In [41]:
#%matplotlib inline
import pandas as pd
import numpy as np
import os
from os.path import basename
import datetime
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import glob
import math

In [2]:
path = r'/Users/mathuser/Documents/Perl Lab Rat Data/Bethany_Pasta'

In [3]:
files = {}
for filename in os.listdir(path):
    if filename.startswith("."):
        continue
    #very important for the subject id to be the 4th thing in the file name, and for there only to be 2 columns. 
    subject_id = (filename.split(".")[0]).split("_")[3]
    #bring in the csv, only take first 2 columns, delete rows with empty values
    df = pd.read_csv(path + "//"+ filename, header=None, names=["col0","col1"]).dropna(axis=0, how="any")
    #reshape dataframe so that score, date, time, and initial are columns
    df2 = pd.DataFrame(df["col1"].values.reshape(-1,4), columns=["Score","Date","Time ended", "Initial"])
    if subject_id in files.keys():
        files[subject_id] = files[subject_id].append(df2)
    else:
        files[subject_id] = df2
    #save as new CSV
    df2 = files[subject_id]
    df2.to_csv("/Users/mathuser/Documents/Perl Lab Rat Data/reshape/"+ subject_id + "_reshaped.csv")

In [4]:
path2 = "/Users/mathuser/Documents/Perl Lab Rat Data/reshape/"
#includes all data, including training.
for filename in os.listdir(path2):
    if filename.startswith("."):
        continue
    subject_id = (filename.split(".")[0]).split("_")[0]
    df3 = pd.read_csv(path2 + "//" + filename, header = 0, names = ["Score","Date","Time ended","Initial"])
    df4 = df3.assign(Delta="NaN")
    #find difference between days
    for x in range(len(df3)):
        df4.iloc[x,4]=(datetime.strptime(df3.iloc[x,1],"%m/%d/%y")-datetime.strptime(min(df3["Date"]),"%m/%d/%y")).days
    #calculate week
    df4 = df4.assign(Week = "NaN")
    for x in range(len(df3)):
        df4.iloc[x,5] = int(df4.iloc[x,4]/5)
    #mean score of each week
    df5 = df4
    df5["Score"]=df5["Score"].astype("int")
    df5 = df4.groupby("Week",as_index=False)["Score"].mean()
    df5.to_csv("/Users/mathuser/Documents/Perl Lab Rat Data/pasta_mean/"+ subject_id + "_pasta_mean.csv")
    #make a scatter plot
    graph = df5.plot(kind="scatter", x="Week", y="Score",title = subject_id)
    fig = graph.get_figure()
    fig.savefig("/Users/mathuser/Documents/Perl Lab Rat Data/graphs/"+subject_id+"_plot.png")
    plt.close("all")

In [5]:
#take into account the date of injury
path2 = "/Users/mathuser/Documents/Perl Lab Rat Data/reshape/"
refpath = "/Users/mathuser/Documents/Perl Lab Rat Data"
for filename in os.listdir(path2):
    if filename.startswith("."):
        continue
    subject_id = (filename.split(".")[0]).split("_")[0]
    #skip AA05 because there's only 1 data point right now
    if subject_id == "AA05":
        continue
    refdf = pd.read_csv(refpath + "//subject_ref.csv", header = 0)
    injure_date = datetime.strptime(refdf[refdf.subject_id.isin([subject_id])].iloc[0,2],"%m/%d/%y")
    #currently, most of the baseline week only has 2-3 days, so I'm just going to keep it the way it is
    #Monday
    if injure_date.weekday()==0:
        baseline = injure_date - timedelta(days=7)
    #Tuesday
    elif injure_date.weekday()==1:
        baseline = injure_date - timedelta(days=8)
    #Wednesday
    elif injure_date.weekday()==2:
        baseline = injure_date - timedelta(days=9)
    #Thursday
    elif injure_date.weekday()==3:
        baseline = injure_date - timedelta(days=10)
    #Friday
    elif injure_date.weekday()==4:
        baseline = injure_date - timedelta(days=11)
    else:
        print("Not a weekday "+subject_id)
        continue
    datadf = pd.read_csv(path2 + "//" + subject_id + "_reshaped.csv", header = 0, names = ["Score","Date","Time ended","Initial"])
    if pd.isnull(refdf[refdf.subject_id.isin([subject_id])].iloc[0,5]):
        end_date = datetime.strptime(max(datadf["Date"]),"%m/%d/%y")
    else:
        end_date = datetime.strptime(refdf[refdf.subject_id.isin([subject_id])].iloc[0,5],"%m/%d/%y")
    datadf['Date'] = pd.to_datetime(datadf['Date'])  
    mask = (datadf['Date'] > baseline) & (datadf['Date'] <= end_date)
    datadf = datadf.loc[mask]
    datadf = datadf.reset_index(drop=True)
    datadf.to_csv(refpath+"/date_restricted/"+subject_id+"_date_restricted.csv")

In [6]:
#mean calculations with cut data
path3 = "/Users/mathuser/Documents/Perl Lab Rat Data/date_restricted/"
for filename in os.listdir(path3):
    if filename.startswith("."):
        continue
    subject_id = (filename.split(".")[0]).split("_")[0]
    #skip these IDs for now because files are empty
    #if subject_id == "B04" or subject_id =="AA06" or subject_id == "AA01" or subject_id == "B06":
        #continue
    #read file
    datadf = pd.read_csv(path3+"//"+subject_id+"_date_restricted.csv", header = 0)
    #calculate time difference
    datadf=datadf.assign(Delta="NaN")
    for x in range(len(datadf)):
        datadf.iloc[x,5]=(datetime.strptime(datadf.iloc[x,2],"%Y-%m-%d")-datetime.strptime(min(datadf["Date"]),"%Y-%m-%d")).days
    #add new column to calculate which week it is
    datadf = datadf.assign(Week = "NaN")
    for x in range(len(datadf)):
        datadf.iloc[x,6] = int(datadf.iloc[x,5]/5)
    #overwrite baseline week based on day of week?
    datadf.to_csv("/Users/mathuser/Documents/Perl Lab Rat Data/cut_week_calcs/"+ subject_id + "_cut_week_calc.csv")
    #calculate means of week
    meandf = datadf
    meandf["Score"]=meandf["Score"].astype("int")
    meandf = meandf.groupby("Week",as_index=False)["Score"].mean()
    #calculate variance of week
    vardf = datadf
    vardf["Score"] = vardf["Score"].astype("int")
    vardf = vardf.groupby("Week", as_index=False)["Score"].var()
    vardf = vardf.rename(columns = {"Week":"Week","Score":"Variance"})
    #combine mean and variance
    statdf = pd.concat([meandf,vardf["Variance"]], axis=1)
    #save to csv
    statdf.to_csv("/Users/mathuser/Documents/Perl Lab Rat Data/new_pasta_mean/"+ subject_id + "_new_pasta_mean.csv")
    #make a scatter plot of cut data
    graph = statdf.plot(kind="scatter", x="Week", y="Score",title = subject_id, yerr="Variance")
    fig = graph.get_figure()
    fig.savefig("/Users/mathuser/Documents/Perl Lab Rat Data/cut_graphs/"+subject_id+"_cut_plot.png")
    plt.close("all")
print("Done!")

Done!


In [7]:
#separate the subjects based on whether or not they got stimulation & make csv
#stimulation dataframe
stimdf = pd.DataFrame()
refdf = pd.read_csv(refpath + "//subject_ref.csv", header = 0)
cutpath = "/Users/mathuser/Documents/Perl Lab Rat Data/cut_week_calcs/"
stim = []
nostim = []
#populate the lists of stim an no stilm
for x in range(len(refdf)):
    if refdf.iloc[x,6] == 1:
        stim.append(refdf.iloc[x,0])
    else:
        nostim.append(refdf.iloc[x,0])

In [8]:
#make the stim files
for filename in os.listdir(cutpath):
    if filename.startswith("."):
        continue
    subject_id = (filename.split(".")[0]).split("_")[0]
    #read the cut file, and make a new dataframe based on it
    if subject_id in stim:
        datadf = pd.read_csv(cutpath + subject_id + "_cut_week_calc.csv")
        stimsubdf = pd.DataFrame()
        stimsubdf["Week"] = datadf["Week"]
        stimsubdf["Date"] = datadf["Date"]
        stimsubdf["Score"] = datadf["Score"]
        stimsubdf["subject_id"] = subject_id
        cols = stimsubdf.columns.tolist()
        cols = cols[-1:] + cols[:-1]
        stimsubdf = stimsubdf[cols]
        stimsubdf.to_csv("/Users/mathuser/Documents/Perl Lab Rat Data/stim_ind_file/" + subject_id + "_ind.csv")
    else:
        continue
#define paths
indpath = "/Users/mathuser/Documents/Perl Lab Rat Data/stim_ind_file/"
allFiles = glob.glob(indpath + "*.csv")
#open all files that are in the stim folder
df_from_each_file = (pd.read_csv(f) for f in allFiles)
stimdf = pd.concat(df_from_each_file, ignore_index=True)
stimdf = stimdf.reset_index(drop=True)
stimdf = stimdf.drop(stimdf.columns[0],axis=1)
#save all as csv
stimdf.to_csv(refpath + "/" + "all_stim.csv")
#make the graph from the DataFrame
stimgraph = stimdf.plot(kind="scatter", x="Week", y="Score",title ="All Stim")
stimfig = stimgraph.get_figure()
stimfig.savefig(refpath+"/all_stim.png")
plt.close("all")

In [9]:
cutpath = "/Users/mathuser/Documents/Perl Lab Rat Data/cut_week_calcs/"
#separate the subjects based on whether or not they got stimulation & make csv
#no stimulation dataframe
nostimdf = pd.DataFrame()
#open each file and make a new file
for filename in os.listdir(cutpath):
    if filename.startswith("."):
        continue
    subject_id = (filename.split(".")[0]).split("_")[0]
    if subject_id in nostim:
        datadf = pd.read_csv(cutpath + subject_id + "_cut_week_calc.csv")
        nostimsubdf = pd.DataFrame()
        nostimsubdf["Week"] = datadf["Week"]
        nostimsubdf["Date"] = datadf["Date"]
        nostimsubdf["Score"] = datadf["Score"]
        nostimsubdf["subject_id"] = subject_id
        cols = nostimsubdf.columns.tolist()
        cols = cols[-1:] + cols[:-1]
        nostimsubdf = nostimsubdf[cols]
        nostimsubdf.to_csv("/Users/mathuser/Documents/Perl Lab Rat Data/no_stim_ind_file/" + subject_id + "_ind.csv")
    else:
        continue
#set paths
no_indpath = "/Users/mathuser/Documents/Perl Lab Rat Data/no_stim_ind_file/"
nostimFiles = glob.glob(no_indpath + "*.csv")
#open all files in the no stim folder
no_stim_df_from_each_file = (pd.read_csv(f) for f in nostimFiles)
#concatenate all the files 
nostimdf = pd.concat(no_stim_df_from_each_file,ignore_index=True)
nostimdf = nostimdf.reset_index(drop=True)
nostimdf = nostimdf.drop(nostimdf.columns[0],axis=1)
#save as csv
nostimdf.to_csv(refpath + "/" + "all_nostim.csv")
#make a plot
nostimgraph = nostimdf.plot(kind="scatter", x="Week", y="Score",title ="All No Stim")
nostimfig = nostimgraph.get_figure()
nostimfig.savefig(refpath+"/no_stim.png")
plt.close("all")

In [45]:
#stimulation therapy comparison
therapy_dates = dict(zip(refdf.subject_id,refdf.stim_start_date.values))
therapypath = "/Users/mathuser/Documents/Perl Lab Rat Data/therapy_weeks/"
for filename in os.listdir(indpath):
    if filename.startswith("."):
        continue
    subject_id = (filename.split(".")[0]).split("_")[0]
    therapy_date = datetime.strptime(therapy_dates[subject_id],"%m/%d/%y")
    therapydf = pd.read_csv(indpath+filename)
    therapydf = therapydf.reset_index(drop=True)
    for x in range(len(therapydf)):
        delta = (datetime.strptime(therapydf.iloc[x,3],"%Y-%m-%d")-therapy_date).days
        therapydf.iloc[x,2] = math.floor(delta/5)
    therapydf = therapydf.drop(therapydf.columns[0],axis=1)
    therapydf.to_csv(therapypath +  subject_id + ".csv")
for filename in os.listdir(no_indpath):
    if filename.startswith("."):
        continue
    subject_id = (filename.split(".")[0]).split("_")[0]
    therapy_date = datetime.strptime(therapy_dates[subject_id],"%m/%d/%y")
    therapydf = pd.read_csv(no_indpath+filename)
    therapydf = therapydf.reset_index(drop=True)
    for x in range(len(therapydf)):
        delta = (datetime.strptime(therapydf.iloc[x,3],"%Y-%m-%d")-therapy_date).days
        therapydf.iloc[x,2] = math.floor(delta/5)
    therapydf = therapydf.drop(therapydf.columns[0],axis=1)
    therapydf.to_csv(therapypath +  subject_id + ".csv")
therFiles = glob.glob(therapypath + "*.csv")
#open all files in the no stim folder
ther_from_each_file = (pd.read_csv(f) for f in therFiles)
#concatenate all the files 
alltherdf = pd.concat(ther_from_each_file,ignore_index=True)
allther = allther.reset_index(drop=True)
allther = allther.drop(allther.columns[0],axis=1)
#save as csv
allther.to_csv(refpath + "/" + "all_therapy_aligned.csv")

Unnamed: 0.1,Unnamed: 0,subject_id,Week,Date,Score
0,0,A08,-6,2018-06-20,59
1,1,A08,-6,2018-06-21,58
2,2,A08,-3,2018-07-09,3
3,3,A08,-2,2018-07-10,2
4,4,A08,-2,2018-07-12,3
5,5,A08,-1,2018-07-16,0
6,6,A08,-1,2018-07-17,0
7,7,A08,-1,2018-07-18,0
8,8,A08,-1,2018-07-19,0
9,9,A08,0,2018-07-20,0
