In [2]:
import csv
import matplotlib.pyplot as mp
import numpy as np
import random as rand
%matplotlib


Using matplotlib backend: TkAgg


In [3]:
class subject:
    def __init__(self,sub_id_):
        self.num=0
        self.sub_id=sub_id_
        self.datapoints={}
        self.injury=""
        #subjective data
        self.pain=0
        self.gen=0
        self.phys=0
        
        self.men=0
        self.act=0
        self.mot=0
        self.mean_mu1=0
        self.mean_sigma1=0
        self.mean_mu2=0
        self.mean_sigma2=0
        self.std_mu1=0
        self.std_mu2=0
        self.std_sigma1=0
        self.std_sigma2=0
        self.std_SNR=0
        self.mean_t0=0
        self.std_t0=0

        
    def remove_data(self,data_id):
        if( data_id in self.datapoints):
            self.datapoints.pop(data_id)
            self.num+=1
            
    def insert_data(self, data_):
        self.datapoints[data_.test_id]=data_
            
    def setMeans(self):
        self.mean_mu1=0
        self.mean_sigma1=0
        self.mean_mu2=0
        self.mean_sigma2=0
        self.mean_SNR=0
        for dat in self.datapoints.values():
            self.mean_mu1+=dat.mu1
            self.mean_sigma1+=dat.sigma1
            self.mean_mu2+=dat.mu2
            self.mean_sigma2+=dat.sigma2
            self.mean_SNR+=dat.SNR
            self.mean_t0+=dat.t0
        self.mean_mu1/=len(self.datapoints)
        self.mean_mu2/=len(self.datapoints)
        self.mean_sigma1/=len(self.datapoints)
        self.mean_sigma2/=len(self.datapoints)
        self.mean_SNR/=len(self.datapoints)
        self.mean_t0/=len(self.datapoints)

        
    def setStd(self):
        for dat in self.datapoints.values():
            self.std_mu1+=(dat.mu1-self.mean_mu1)**2
            self.std_mu2+=(dat.mu2-self.mean_mu2)**2
            self.std_sigma1+=(dat.sigma1-self.mean_sigma1)**2
            self.std_sigma2+=(dat.sigma2-self.mean_sigma2)**2
            self.std_SNR+=(dat.sigma2-self.mean_SNR)**2
            self.std_t0+=(dat.sigma2-self.mean_t0)**2
            
        self.std_mu1=(self.std_mu1/len(self.datapoints))**(1/2.0)
        self.std_mu2=(self.std_mu2/len(self.datapoints))**(1/2.0)
        self.std_sigma1=(self.std_sigma1/len(self.datapoints))**(1/2.0)
        self.std_sigma2=(self.std_sigma2/len(self.datapoints))**(1/2.0)
        self.std_SNR=(self.std_SNR/len(self.datapoints))**(1/2.0)
        self.std_t0=(self.std_t0/len(self.datapoints))**(1/2.0)
        
    
    def __str__(self):
        out="subject id: "+str(self.sub_id)
        for point in self.datapoints.values():
            out+="\n\t"+str(point)
        return out

        
        
class data:
    def __init__(self,test_id_,t0_,D1_,mu1_,sigma1_,D2_,mu2_,sigma2_,SNR_):
        self.test_id=int(test_id_)
        self.t0=float(t0_)
        self.D1=float(D1_)
        self.D2=float(D2_)
        self.mu1=float(mu1_)
        self.mu2=float(mu2_)
        self.sigma1=float(sigma1_)
        self.sigma2=float(sigma2_)
        self.SNR=float(SNR_)
        self.sigsq1=self.sigma1**2
        self.sigsq2=self.sigma2**2

        
        
    def __str__(self):
        return "\t {} {} {} {} {} {} {} {} {}".format(self.test_id,self.t0,self.D1,self.mu1,self.sigma1,self.D2,self.mu2,self.sigma2,self.SNR)
        
class data_set:
    def __init__(self,handwriting_file="",fatigue_file="",medical_file=""):
        self.subjects={}
        self.num=0
        
        if(handwriting_file!=""):
            line_count=0
            with open(handwriting_file,mode='r') as h_file:
                reader=csv.reader(h_file)
                for row in reader:
                    if(line_count==0):
                        pass
                    else:
                        dat=data(row[1],row[2],row[3],row[4],row[5],row[6],row[7],row[8],row[9])
                        self.add_data(row[0],dat)
                    line_count+=1
        if(fatigue_file!=""):
            line_count=0       
            with open(fatigue_file,mode='r') as f_file:
                reader=csv.reader(f_file)
                for row in reader:
                    if(line_count==0):
                        pass
                    elif(row[0] in self.subjects):
                        sub=self.subjects[row[0]]
                        if(row[3]!=''):
                            sub.pain=int(row[3])
                        sub.gen=int(row[4])
                        sub.phys=int(row[5])
                        sub.men=int(row[6])
                        sub.act=int(row[7])
                        sub.mot=int(row[8])
                    
                    line_count+=1
            bad_dat=[]        
            for sub in self.subjects.values():
                if(sub.gen==0):
                    print("removing subject ",sub.sub_id)
                    bad_dat.append(sub.sub_id)             
                sub.setMeans()
                sub.setStd()
            for i in bad_dat:
                self.remove_subject(i)
        
        
    def add_data(self,subject_id,data):
        if(not(subject_id in self.subjects)):
            sub=subject(subject_id)   
            self.subjects[subject_id]=sub
        self.subjects[subject_id].insert_data(data)
        
    def remove_subject(self, subject_id):
        self.subjects.pop(subject_id)
        
    def __str__(self):
        out="Contents of the dataset: "
        for i in self.subjects.values():
            out+="\n"+str(i)
        return out
    
    def divide(self,training_set_file,validation_set_file):
        trainingset=data_set()
        validationset=data_set()
        for sub in self.subjects.values():
            for data in sub.datapoints.values():
                if(rand.random()<0.2):
                    validationset.add_data(sub.sub_id,data)
                else:
                    trainingset.add_data(sub.sub_id,data)
        validationset.save("validation_set.csv")
        trainingset.save("training_set.csv")
    
    
    def save(self,out_file):
         with open(out_file, mode='w',newline='') as output:
            writer = csv.writer(output, delimiter=',')
            for sub in self.subjects.values():
                for dat in sub.datapoints.values():
                    writer.writerow([sub.sub_id,dat.test_id,dat.t0,dat.D1,dat.mu1,dat.sigma1,dat.D2,dat.mu2,dat.sigma2,dat.SNR])
            output.close()
            
    def get_set(self,label_type):
        labels=[]
        data=[]
        for sub in self.subjects.values():
            label_types={"pain": sub.pain,'gen':sub.gen,"phys":sub.phys,"men":sub.men,"act":sub.act,"mot":sub.mot }
            for dat in sub.datapoints.values():
                point=np.array([dat.t0,dat.mu1,dat.sigma1,dat.mu2,dat.sigma2,dat.SNR])
                labels.append(label_types[label_type])
                data.append(point)
        return np.array(labels,dtype=int),np.array(data,dtype=float)
        