In [None]:
%matplotlib notebook
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from os import listdir
from scipy.interpolate import griddata
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
from mpl_toolkits.mplot3d import Axes3D
from scipy.optimize import leastsq

In [None]:
def CreateMasterSpiralDF(dirName="."):
    """CreateMasterSpiralDF(dirName) where dirName is the directory name of the .spiral files,
       output pandas dataframe of all spiral files header data and filenames"""
    assert isinstance(dirName, str), "The 'dirName' argument must be a string!"
    fileNames = [dirName+'/'+i for i in listdir(dirName) if i[-7:] == '.spiral']
    
    if len(fileNames)==0:
        print "No '.spiral' files found in directory '"+dirName+"'."
        return
    
    D = {"file_name" : []}
    for index,name in enumerate(fileNames):
        f = open(name,'r')
        for i in xrange(2): colNames = f.readline().split()
        if index == 0:
            for colName in colNames:
                D[colName] = []
        D["file_name"].append(name)
        simParams = f.readline().split()
        for colIndex,colName in enumerate(colNames):
            D[colName].append(float(simParams[colIndex]))
        f.close()
    df = pd.DataFrame(D)
    return df

def CreateSpiralDF(fileName):
    """CreateSpiralDF(fileName) where fileName is the name of a '.spiral' analysis file,
       outputs pandas dataframe of time series data"""
    assert isinstance(fileName,str), "The 'fileName' argument must be a string!"
    try:
        f = open(fileName,'r')
    except:
        print "Could not open file '" + fileName + "'."
        return
    for i in xrange(4): colNames = f.readline().split()
    D = {}
    for colName in colNames:
        D[colName] = []
    for line in f:
        line = line.split()
        for colIndex, colName in enumerate(colNames):
            D[colName].append(float(line[colIndex]))
    f.close()
    df = pd.DataFrame(D)
    return df

def GetSpiralFrequencies(masterDF):
    maxTime = int(masterDF["nsteps"][0]/masterDF["nspec"][0])-1
    freqs = pd.Series(np.zeros(len(masterDF.index)))
    for i in masterDF.index:
        df=CreateSpiralDF(masterDF.iloc[i]["file_name"])
        if not (df["time"].iloc[-1] < maxTime):
            freqs[i] = GetSpiralFrequency(df,maxTime)
    masterDF["spiral_frequency"] = freqs
    
def GetBendingEnergies(masterDF):
    maxTime = int(masterDF["nsteps"][0]/masterDF["nspec"][0])-1
    ebend = pd.Series(np.zeros(len(masterDF.index)))
    for i in masterDF.index:
        df=CreateSpiralDF(masterDF.iloc[i]["file_name"])
        if not (df["time"].iloc[-1] < maxTime) and masterDF.iloc[i]["spiral_frequency"]:
            ebend[i] = df["E_bend"].iloc[-1]
        else:
            ebend[i] = None
    masterDF["E_bend"] = ebend

def GetSpiralFrequency(df,maxTime):
    assert isinstance(df,pd.DataFrame),"df needs to be pandas DataFrame type"
    N=len(df["time"])/2 #num of gridpoints
    delta=1
    time = np.array(df["time"][:N])
    z = np.array(df["tip_z_proj"][N:])
    # If we have odd time, make z and time same size
    if len(time)!=len(z):
        z = np.array(df["tip_z_proj"][N+1:])
    # Zero pad data, assuming spiral continues indefinitely
    padding=int(2**17)
    yf=np.fft.fft(z,n=padding)
    xf=np.fft.fftfreq(padding,d=delta)
    freq = xf[np.argmax(2.0/padding * np.abs(yf[0:padding/2]))]
    # If our period lasts longer than a simulation, then return freq=0 (assume no spiral) 
    if freq > 3.0/maxTime:
        return freq
    else:
        return 0
    
def PlotSpiralFrequencies(masterDF):
    maxTime = int(masterDF["nsteps"][0]/masterDF["nspec"][0])-1
    for i in masterDF.index:
        df=CreateSpiralDF(masterDF.iloc[i]["file_name"])
        # If we exited early, spiral did not form
        if not (df["time"].iloc[-1] < maxTime):
            PlotSpiralFrequency(df,maxTime)

def PlotSpiralFrequency(df,maxTime):
    assert isinstance(df,pd.DataFrame),"df needs to be pandas DataFrame type"
    assert isinstance(maxTime, (float,int)), "maxTime needs to be a float or int"
    N=len(df["time"])/2 #num of gridpoints
    delta=1
    time = np.array(df["time"][:N])
    z = np.array(df["tip_z_proj"][N:])
    # If we have odd time, make z and time same size
    if len(time)!=len(z):
        z = np.array(df["tip_z_proj"][N+1:])
    # Zero pad data, assuming spiral continues indefinitely
    padding=int(2**17)
    yf=np.fft.fft(z,n=padding)
    xf=np.fft.fftfreq(padding,d=delta)
    freq = xf[np.argmax(2.0/padding * np.abs(yf[0:padding/2]))]
    if freq < 1.0/maxTime:
        return
    guess_phase=0
    optimize_func = lambda phase: np.sin(2*np.pi*freq*(time+phase))-z
    est_phase = leastsq(optimize_func, guess_phase)[0]
    est_data = np.sin(2*np.pi*freq*(time+est_phase))
    f, ax = plt.subplots(2)
    ax[0].set_title(r"$f_{max} = $" + str(freq))
    ax[0].plot(xf,yf.real,xf,yf.imag)
    ax[0].set_xlim(0,2*freq)
    ax[0].set_xlabel("frequency")
    ax[0].set_ylabel("fft signal")
    ax[0].legend(["real","imag"],loc=1)
    ax[1].plot(time,z,'k-',time,est_data,'r--')
    ax[1].set_xlabel("sim time")
    ax[1].set_title("Filament tip orientation "+r'$U_Z$')
    ax[1].set_ylabel(r'$U_Z$')
    ax[1].set_xlim(0,min(8.0/freq,time[-1]))
    ax[1].legend(['Data','LS Fit'],loc=1)
    f.subplots_adjust(hspace=0.7)
    plt.show()
    plt.close()
    
def PlotFrequencyContour(masterDF):
    df=masterDF
    x1 = np.linspace(df['driving'].min(), df['driving'].max(), len(df['driving'].unique()))
    y1 = np.linspace(df['persistence_length'].min(), df['persistence_length'].max(), len(df['persistence_length'].unique()))
    x2, y2 = np.meshgrid(x1, y1)
    # Interpolate unstructured D-dimensional data.
    z2 = griddata((df['driving'], df['persistence_length']), df['spiral_frequency'], (x2, y2),method='linear')
    fig = plt.figure()
    ax = fig.gca()
    surf = ax.contourf(x2, y2, z2, rstride=1, cstride=1, cmap=cm.coolwarm,
                           linewidth=0, antialiased=False)
    fig.colorbar(surf, shrink=0.5, aspect=5)
    plt.title('Spiral frequency, L=50')
    plt.xlabel("Driving")
    plt.ylabel("Persistence Length")
    plt.ylim(0,8000)
    plt.show()
    plt.close()
    
def PlotEnergyContour(masterDF):
    df=masterDF
    x1 = np.linspace(df['driving'].min(), df['driving'].max(), len(df['driving'].unique()))
    y1 = np.linspace(df['persistence_length'].min(), df['persistence_length'].max(), len(df['persistence_length'].unique()))
    x2, y2 = np.meshgrid(x1, y1)
    # Interpolate unstructured D-dimensional data.
    z2 = griddata((df['driving'], df['persistence_length']), df['E_bend'], (x2, y2),method='linear')
    fig = plt.figure()
    ax = fig.gca()
    surf = ax.contourf(x2, y2, z2, rstride=1, cstride=1, cmap=cm.coolwarm,
                           linewidth=0, antialiased=False)
    fig.colorbar(surf, shrink=0.5, aspect=5)
    plt.title('Bending Energy, L=50')
    plt.xlabel("Driving")
    plt.ylabel("Persistence Length")
    plt.ylim(0,8000)
    plt.show()
    plt.close()

def GetSpiralBool(masterDF):
    spirals = pd.Series(np.zeros(len(masterDF.index)))
    for i in masterDF.index:
        if masterDF.iloc[i]["spiral_frequency"] > 0:
            spirals[i] = 1
    masterDF["spiral"] = spirals

In [None]:
def FeatureScaling(X):
    Xreg=np.copy(X)
    rows = X.shape[0]
    if X.size == rows:
        cols=1
    else:
        rows,cols = X.shape
    mu = np.mean(X,0)
    sigma = np.std(X,0)
    for i in xrange(rows):
        for j in xrange(1,cols):
            Xreg[i,j] = (Xreg[i,j] - mu[j])/sigma[j]
    return Xreg, mu, sigma

def DeFeatureScaling(theta,mu,sigma):
    thet0 = theta[0]
    ds_theta = theta*sigma+mu
    ds_theta[0] = thet0
    return ds_theta

def FeatureScalingWith(theta,mu,sigma):
    thetaScaled=np.copy(theta)
    rows = theta.shape[0]
    if theta.size == rows:
        cols=1
    else:
        rows,cols = theta.shape
    for i in xrange(rows):
        for j in xrange(1,cols):
            thetaScaled[i,j] = (thetaScaled[i,j] - mu[j])/sigma[j]
    return thetaScaled

def Sigmoid(z):
    sig = 1.0/(1.0+np.exp(-z))
    return sig.flatten()

def LogisticCost(X,thetas,y):
    m,n = np.shape(X)
    hyp = Sigmoid(np.matmul(X,thetas))
    cost = -(np.matmul(y,np.log(hyp)) + np.matmul(1-y,np.log(1-hyp)))/m
    d_cost = np.matmul(X.transpose(),hyp-y)/m
    return cost, d_cost

def GradientDescentLogistic(X,y,rate,iterations):
    thetas = np.zeros(np.shape(X)[1])
    costs=[]
    for it in xrange(iterations):
        cost, d_cost = LogisticCost(X,thetas,y)
        thetas = thetas - rate * d_cost
        costs.append(cost)
    return thetas,costs

def PlotCosts(costs):
    plt.plot(costs)
    plt.xlabel("Iterations")
    plt.ylabel("Cost function")
    plt.show()
    plt.close()
    
def TestPrediction(X_test,thetas,y_test):
    predictions = Sigmoid(np.matmul(X_test,thetas))
    predictions = [0 if i<0.5 else 1 for i in predictions]
    tot = len(predictions)
    miss = 0
    for i in xrange(tot):
        if predictions[i] != y_test[i]:
            miss+=1
    return 1.0 - float(miss)/tot   

In [None]:
training_fraction = 0.6
cross_validation_fraction = 0.2
assert (training_fraction + cross_validation_fraction <= 1)
m=int(round(training_fraction*df.shape[0]))
n=int(round(cross_validation_fraction*df.shape[0]))
X = df.ix[:m-1,:].as_matrix(columns=["ones","length","persistence_length","driving"])
y = df.ix[:m-1,:].as_matrix(columns=["spiral"]).flatten()
X_cv = df.ix[m:n-1,:].as_matrix(columns=["ones","length","persistence_length","driving"])
y_cv = df.ix[m:n-1,:].as_matrix(columns=["spiral"]).flatten()
X_test = df.ix[n:,:].as_matrix(columns=["ones","length","persistence_length","driving"])
y_test = df.ix[n:,:].as_matrix(columns=["spiral"]).flatten()

In [None]:
X_scaled, mu, sigma = FeatureScaling(X)
X_cv = FeatureScalingWith(X_cv,mu,sigma)
X_test = FeatureScalingWith(X_test,mu,sigma)
thetas,costs = GradientDescentLogistic(X_scaled,y,3,150)
accuracy = TestPrediction(X_test,thetas,y_test)
thetas_descaled = DeFeatureScaling(thetas,mu,sigma)
print "Accuracy: " + str(accuracy)
print "Thetas: " + str(thetas)
print "Sigma: " +str(sigma)
print "Mu: " + str(mu)
print "Thetas: " + str(thetas_descaled)
#PlotCosts(costs)

In [None]:
fixed_dr_df = CreateMasterSpiralDF('spiral_fixed_dr')
GetSpiralFrequencies(fixed_dr_df)
GetBendingEnergies(fixed_dr_df)
GetSpiralBool(fixed_dr_df)

In [None]:
fixed_dr_df

In [None]:
fixed_dr = fixed_dr_df.ix[:,["length","persistence_length","spiral"]]
fixed_dr['ones'] = pd.Series(np.ones(fixed_dr.shape[0]), index=fixed_dr.index)
training_fraction = 0.6
cross_validation_fraction = 0.2
assert (training_fraction + cross_validation_fraction <= 1)
m=int(round(training_fraction*fixed_dr.shape[0]))
n=int(round(cross_validation_fraction*fixed_dr.shape[0]))
X = fixed_dr.ix[:m-1,:].as_matrix(columns=["ones","length","persistence_length"])
y = fixed_dr.ix[:m-1,:].as_matrix(columns=["spiral"]).flatten()
X_cv = fixed_dr.ix[m:n-1,:].as_matrix(columns=["ones","length","persistence_length"])
y_cv = fixed_dr.ix[m:n-1,:].as_matrix(columns=["spiral"]).flatten()
X_test = fixed_dr.ix[n:,:].as_matrix(columns=["ones","length","persistence_length"])
y_test = fixed_dr.ix[n:,:].as_matrix(columns=["spiral"]).flatten()

In [None]:
X_scaled, mu, sigma = FeatureScaling(X)
X_cv = FeatureScalingWith(X_cv,mu,sigma)
X_test = FeatureScalingWith(X_test,mu,sigma)
thetas,costs = GradientDescentLogistic(X_scaled,y,3,150)
accuracy = TestPrediction(X_test,thetas,y_test)
thetas_descaled = DeFeatureScaling(thetas,mu,sigma)
print "Accuracy: " + str(accuracy)
print "Thetas: " + str(thetas)
print "Sigma: " +str(sigma)
print "Mu: " + str(mu)
print "Thetas: " + str(thetas_descaled)
#PlotCosts(costs)

In [None]:
nospirals=fixed_dr[fixed_dr.spiral==0]
spirals=fixed_dr[fixed_dr.spiral!=0]
nospirals = nospirals.as_matrix(columns=["ones","length","persistence_length"])
spirals = spirals.as_matrix(columns=["ones","length","persistence_length"])
#fig = plt.figure()
#ax = fig.add_subplot(111, projection='3d')
#spirals = FeatureScalingWith(spirals,mu,sigma)
#nospirals = FeatureScalingWith(nospirals,mu,sigma)

xmin = min(min(nospirals[:,1]),min(spirals[:,1]))
xmax = max(max(nospirals[:,1]),max(spirals[:,1]))
XX = np.linspace(xmin,xmax,100)
coeff = thetas
YY = lambda x: - (coeff[0] + coeff[1] * x) / coeff[2]
#ZZ = - ( coeff[0] + coeff[1] * XX + coeff[2] * YY ) / coeff[3]

#plt3d = plt.figure().gca(projection='3d')
#plt3d.plot_surface(XX, YY, ZZ, alpha=0.2)

# Ensure that the next plot doesn't overwrite the first plot
plot = plt.figure().gca()
plot.plot(XX,YY(XX),'r--')
ax = plt.gca()
ax.hold(True)
ax.scatter(nospirals[:,1],nospirals[:,2], c='r', marker='x')
ax.scatter(spirals[:,1],spirals[:,2], c='b', marker='o')
ax.set_xlabel('Aspect Ratio')
ax.set_ylabel('Persistence Length')
ax.set_zlabel('Driving')


#x = np.linspace(20,100,100)
#y = np.linspace(20,10000,100)
#XX,YY = np.meshgrid(x,y)
#coeff = thetas_descaled
#ZZ = - ( coeff[0] + coeff[1] * XX + coeff[2] * YY ) / coeff[3]
#ax.plot_surface(XX,YY,ZZ,alpha=0.2)

plt.show()


In [None]:
mythetas = np.array([1,50,50,50])
mythetas_scaled, s, M = FeatureScaling(mythetas)
coeff = thetas
var=0
for i in xrange(4):
    var += mythetas_scaled[i]*coeff[i]
Sigmoid(var)

In [None]:
nospirals=df[df.spiral==0]
spirals=df[df.spiral!=0]
nospirals = nospirals.as_matrix(columns=["ones","length","persistence_length","driving"])
spirals = spirals.as_matrix(columns=["ones","length","persistence_length","driving"])
#fig = plt.figure()
#ax = fig.add_subplot(111, projection='3d')
#spirals = FeatureScalingWith(spirals,mu,sigma)
#nospirals = FeatureScalingWith(nospirals,mu,sigma)

xmin = min(min(nospirals[:,1]),min(spirals[:,1]))
xmax = max(max(nospirals[:,1]),max(spirals[:,1]))
ymin = min(min(nospirals[:,2]),min(spirals[:,2]))
ymax = max(max(nospirals[:,2]),max(spirals[:,2]))
x = np.linspace(xmin,xmax,100)
y = np.linspace(ymin,ymax,100)
XX,YY = np.meshgrid(x,y)
coeff = thetas_descaled
ZZ = - ( coeff[0] + coeff[1] * XX + coeff[2] * YY ) / coeff[3]

plt3d = plt.figure().gca(projection='3d')
plt3d.plot_surface(XX, YY, ZZ, alpha=0.2)

# Ensure that the next plot doesn't overwrite the first plot
ax = plt.gca()
#ax.hold(True)
ax.scatter(nospirals[:,1],nospirals[:,2],nospirals[:,3], c='r', marker='x')
ax.scatter(spirals[:,1],spirals[:,2],spirals[:,3], c='b', marker='o')
ax.set_xlabel('Aspect Ratio')
ax.set_ylabel('Persistence Length')
ax.set_zlabel('Driving')


#x = np.linspace(20,100,100)
#y = np.linspace(20,10000,100)
#XX,YY = np.meshgrid(x,y)
#coeff = thetas_descaled
#ZZ = - ( coeff[0] + coeff[1] * XX + coeff[2] * YY ) / coeff[3]
#ax.plot_surface(XX,YY,ZZ,alpha=0.2)

plt.show()


In [None]:
nospirals[:,1]

In [None]:
LLpDr = np.array([50,1000,30])
LLpDr = (LLpDr - mu)/sigma
p = Sigmoid(np.dot(LLpDr,thetas))
print "Prediction: " +str(p)

In [None]:
#PlotSpiralFrequencies(masterDF)

In [None]:

#N = 1000 # number of data points
#t = np.linspace(0, 4*np.pi, N)
#data = 3.0*np.sin(t+0.001) + 0.5 + np.random.randn(N) # create artificial data with noise

# we'll use this to plot our first estimate. This might already be good enough for you
data_first_guess = np.sin(omega*xf*delta*10)

# Define the function to optimize, in this case, we want to minimize the difference
# between the actual data and our "guessed" parameters
optimize_func = lambda x: x[0]*np.sin(xf+x[1]) + x[2] - data
est_std, est_phase, est_mean = leastsq(optimize_func, [guess_std, guess_phase, guess_mean])[0]

# recreate the fitted curve using the optimized parameters
data_fit = est_std*np.sin(t+est_phase) + est_mean

plt.plot(data, '.')
plt.plot(data_fit, label='after fitting')
plt.plot(data_first_guess, label='first guess')
plt.legend()
plt.show()


In [None]:
pd.Series(np.arange(10))

In [None]:
var_range = range()
f = open("")
f.close()

# set up index 'pointers' for persistence length and driving

lpIndex={}
drIndex={}
index=0
for i in range(50,1001,50):
    lpIndex[i]=index
    index+=1
index=0
for i in range(0,101,5):
    drIndex[i]=index
    index+=1
    
array = [[1 for i in range(max(drIndex.values())+1)] for j in range(max(lpIndex.values())+1)]

length = 50
for var in range(0,900):
    fname = "spiral_nostoch_l"+'{:03d}'.format(length)+"_v"+'{:03d}'.format(var)+"_filament.spiral"
    f=open(fname,'r')
    f.readline()
    f.readline()
    line = f.readline()
    line=line.split(' ')
    childLength = float(line[1])
    persistenceLength = int(line[2])
    driving = int(line[3])
    if (childLength > 4):
        f.close()
        continue
    angle=[]
    time=[]
    f.readline()
    for line in f:
        line=line.split(' ')
        if (float(line[1]) < np.pi):
            array[lpIndex[persistenceLength]][drIndex[driving]] = 0
            break
for var in range(0,420):
    fname = "spiral_nostoch_l"+'{:03d}'.format(length)+"_v"+'{:03d}'.format(var)+"_filament.spiral"
    f=open(fname,'r')
    f.readline()
    f.readline()
    line = f.readline()
    line=line.split(' ')
    childLength = float(line[1])
    persistenceLength = int(line[2])
    driving = int(line[3])
    angle=[]
    time=[]
    f.readline()
    for line in f:
        line=line.split(' ')
        if (float(line[1]) < np.pi):
            array[lpIndex[persistenceLength]][drIndex[driving]] = 0
            break