## Import Libs

In [1]:
import pandas as pd
import datetime as dt
from datetime import timedelta
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy
from scipy import stats
import os
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import time

### Helper functions

In [2]:
def find_ims(patient,directory):
    '''look for patient in a given directory and return the path to the IMS file'''
    for i in os.listdir(IMS_path):
        if patient in i:
            return directory + '/' + i
    for j in os.listdir(IMS_path +'/first_ims'):
        if patient in j:
            return directory + '/first_ims/' + j
    for k in os.listdir(IMS_path + '/second_ims'):
        if patient in k:
            return directory + '/second_ims/' + k

In [3]:
def in_between_vid(avi,files):
    '''takes in an avi number and a file, and returns true or false 
    if the avi file is included in the textfile annotation'''
    
    if avi[0] == '"':
        avi = avi[1:-1]
    avi_num = int(avi[0:4])
    if files[0] == "E":
        if files[0:2] == "._":
            files = files[2:-1]
        file_name = files.split("_")
        my_file = file_name[2]
        new_file = my_file.split("-")
        new_file[0] = int(new_file[0])
        new_file[1] = int(new_file[1])

        return(new_file[0] <= avi_num <= new_file[1])

In [4]:
def count_annot(file, item):
    '''takes in a file and an item (which is equal to the thing that you are trying to count, for example: laughing)
    returns the amount of instances in that file of the item'''
    data = pd.read_csv(file, sep='\t', header = None)
    if data.shape[1] == 9:
        counts = data[8].value_counts()
    else:
        counts = data[7].value_counts()
    if item in counts:
        return counts[item]
    else:
        return 0

In [5]:
def remove_dup(files):
    new_list = []
    a = [new_list.append(z[0:24]) for z in files]
    new_list = np.sort(new_list)
    new_files = []
    for f in files:
        indx = np.where(new_list == f[0:24])
        if len(indx[0]) > 1:
            new_list = np.delete(new_list,indx[0][0])
        else:
            new_files.append(f)
    
    return new_files

## Locate IMS files

In [19]:
dir_path = '/Users/alia/Documents/dura/userdata/ashafi/annotation_files_to_share/all_subjects_text_files/'
IMS_path = '/Users/alia/Dropbox (UCSF Department of Neurological Surgery)/SUBNETS Dropbox/EMU data tracking/Raw_IMS'

## select patient you want to run
Patient = 'EC137'

## find IMS
IMS_scores = find_ims(Patient,IMS_path)

## grab all IMS times 
IMS_times = pd.read_csv(IMS_scores)
# IMS_times = IMS_times['datlocal_ts']

#for older versions
IMS_times = IMS_times['datcreated_at']
## 
# files = os.listdir(dir_path + Patient + '/' + '4a461e45')
# test_file = dir_path + Patient + '/' + '4a461e45'

In [20]:
# convert the UNIX time stamp to datetime 
time_list = []
a = [time_list.append(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(j))) for j in IMS_times]

In [21]:
# put the data into a df
patient_df = pd.DataFrame()
patient_df['IMS_times'] = IMS_times
patient_df['converted_times'] = time_list

In [22]:
patient_df['converted_times'] = pd.to_datetime(patient_df['converted_times'])
patient_df['one_hour_before'] = patient_df['converted_times'] - timedelta(hours=1)
patient_df['one_hour_after'] = patient_df['converted_times'] + timedelta(hours=1)

## Find the annotation times around IMS

### Get into google drive

for info on how to do this:
https://www.twilio.com/blog/2017/02/an-easy-way-to-read-and-write-to-a-google-spreadsheet-in-python.html

In [23]:
scope = ['https://spreadsheets.google.com/feeds',
         'https://www.googleapis.com/auth/drive']
creds = ServiceAccountCredentials.from_json_keyfile_name('client_secret.json', scope)
client = gspread.authorize(creds)

##### read google sheet

In [24]:
sheet = client.open("annotated files to share")
worksheet = sheet.worksheet(Patient)

In [25]:
# col 7 = date 
# col 9 = time 
# col 4 = avi name
hourly_vid = []
for dates in range(len(patient_df)):
    date = str(patient_df['one_hour_after'][dates])[5:10]
    time_after = str(patient_df['one_hour_after'][dates])[11:20]
    time_before = str(patient_df['one_hour_before'][dates])[11:20]
    time_a = dt.datetime.strptime(time_after,"%H:%M:%S").time()
    time_b = dt.datetime.strptime(time_before,"%H:%M:%S").time()
    month = int(date.split("-")[0])
    day = int(date.split("-")[1])
        
    videos = []
    
    all_dates = worksheet.col_values(7)
    all_times = worksheet.col_values(9)
    all_avi_names = worksheet.col_values(4)
    all_avi_nums = worksheet.col_values(5)
    
    for d in range(len(all_dates)):
        if all_dates[d] != "" and all_dates[d] != "Date":
            date_time = all_dates[d].split("/")
#             print(date_time)
            if int(date_time[0]) == month:
                if int(date_time[1]) == day:
                    vid_time = dt.datetime.strptime(all_times[d],"%H:%M:%S").time()
                    if time_b <= vid_time <= time_a:
                        videos.append([all_avi_names[d],all_avi_nums[d]])
    
    hourly_vid.append(videos)

patient_df['videos'] = hourly_vid
    
## now the df has all the needed videos in the 'videos' column

### Go through and find all the videos in the directories

In [26]:
hourly_vid

[[],
 [['44b461c7', '0655.avi'],
  ['44b461c7', '0656.avi'],
  ['44b461c7', '0657.avi'],
  ['44b461c7', '0658.avi'],
  ['44b461c7', '0659.avi'],
  ['44b461c7', '0660.avi'],
  ['44b461c7', '0661.avi'],
  ['44b461c7', '0662.avi'],
  ['44b461c7', '0663.avi'],
  ['44b461c7', '0664.avi'],
  ['44b461c7', '0665.avi'],
  ['44b461c7', '0666.avi'],
  ['44b461c7', '0667.avi'],
  ['44b461c7', '0668.avi'],
  ['44b461c7', '0669.avi'],
  ['44b461c7', '0670.avi'],
  ['44b461c7', '0671.avi'],
  ['44b461c7', '0672.avi'],
  ['44b461c7', '0673.avi'],
  ['44b461c7', '0674.avi'],
  ['44b461c7', '0675.avi'],
  ['44b461c7', '0676.avi'],
  ['44b461c7', '0677.avi'],
  ['44b461c7', '0678.avi'],
  ['44b461c7', '0679.avi'],
  ['44b461c7', '0680.avi'],
  ['44b461c7', '0681.avi'],
  ['44b461c7', '0682.avi'],
  ['44b461c7', '0683.avi'],
  ['44b461c7', '0684.avi'],
  ['44b461c7', '0685.avi'],
  ['44b461c7', '0686.avi'],
  ['44b461c7', '0687.avi'],
  ['44b461c7', '0688.avi'],
  ['44b461c7', '0689.avi'],
  ['44b461c7', 

In [28]:
smiling = []
pos = []
neg = []
crying = []
pain = []
laughing = []
for videos in patient_df['videos']:
    count_smiling = 0
    count_pos = 0
    count_neg = 0
    count_crying = 0
    count_pain = 0
    count_laughing = 0
    all_files = []
    for v in videos:
        if os.path.exists(dir_path + Patient + '/' + v[0]):
            files = os.listdir(dir_path + Patient + '/' + v[0])
            for f in files:
                if in_between_vid(v[1],f):
                    all_files.append(f)


        
    uniq_files = remove_dup(np.unique(all_files))
    print(uniq_files)
    for fi in uniq_files:
        fi_split = fi.split("_")
        fi_dir = fi_split[1]
        file_name = dir_path + Patient + '/' + fi_dir + '/' + fi
        count_smiling += count_annot(file_name,'smiling')
        count_pos += count_annot(file_name,'positive affect')
        count_neg += count_annot(file_name, 'negative affect')
        count_crying += count_annot(file_name, 'crying')
        count_pain += count_annot(file_name, 'pain-general')
        count_laughing += count_annot(file_name, 'laughing')
        print(count_smiling)
    
    crying.append(count_crying)
    neg.append(count_neg)
    pos.append(count_pos)
    smiling.append(count_smiling)
    pain.append(count_pain)
    laughing.append(count_laughing)

[]
['EC137_44b461c7_0654-0658_DT.txt', 'EC137_44b461c7_0659-0663_JG.txt', 'EC137_44b461c7_0664-0668_NK.txt', 'EC137_44b461c7_0669-0673_VC.txt', 'EC137_44b461c7_0674-0678_JG.txt', 'EC137_44b461c7_0679-0683_NK.txt', 'EC137_44b461c7_0684-0688_DT.txt', 'EC137_44b461c7_0689-0693_VC.txt', 'EC137_44b461c7_0694-0698_NK.txt', 'EC137_44b461c7_0699-0703_JG.txt', 'EC137_44b461c7_0704-0708_DT.txt', 'EC137_44b461c7_0709-0714_VC.txt']
1
2
2
2
2
3
4
4
4
4
5
6
['EC137_1ded1630_0557-0561_DT.txt', 'EC137_1ded1630_0562-0566_JG.txt', 'EC137_1ded1630_0567-0571_NK.txt', 'EC137_1ded1630_0572-0576_VC.txt', 'EC137_1ded1630_0577-0581_JG.txt', 'EC137_1ded1630_0582-0586_NK.txt', 'EC137_1ded1630_0587-0591_DT.txt', 'EC137_1ded1630_0592-0596_VC.txt', 'EC137_1ded1630_0597-0601_NK.txt', 'EC137_1ded1630_0602-0606_JG.txt', 'EC137_1ded1630_0607-0611_DT.txt', 'EC137_1ded1630_0612-0616_VC.txt']
0
0
0
0
0
0
0
0
0
0
0
0
['EC137_2fff85c8_0110-0114_VC.txt', 'EC137_2fff85c8_0115-0119_NK.txt', 'EC137_2fff85c8_0120-0124_JG.txt', '

3
3
4
8
10
11
24
41
50
['EC137_ed39b6a1_0075-0079_VC.txt', 'EC137_ed39b6a1_0080-0084_NK.txt', 'EC137_ed39b6a1_0085-0089_DT.txt', 'EC137_ed39b6a1_0090-0094_JG.txt', 'EC137_ed39b6a1_0095-0099_VC.txt', 'EC137_ed39b6a1_0100-0104_NK.txt', 'EC137_ed39b6a1_0105-0109_DT.txt', 'EC137_ed39b6a1_0110-0114_JG.txt', 'EC137_ed39b6a1_0115-0119_VC.txt', 'EC137_ed39b6a1_0120-0124_NK.txt', 'EC137_ed39b6a1_0125-0129_DT.txt', 'EC137_ed39b6a1_0130-0136_JG.txt']
0
1
1
5
5
5
5
5
5
5
5
11
['EC137_05784fda_0538-0542_VC.txt', 'EC137_05784fda_0543-0547_NK.txt', 'EC137_05784fda_0548-0552_DT.txt', 'EC137_05784fda_0553-0557_JG.txt', 'EC137_05784fda_0558-0562_VC.txt', 'EC137_05784fda_0563-0567_NK.txt', 'EC137_05784fda_0568-0572_DT.txt', 'EC137_05784fda_0573-0577_JG.txt', 'EC137_05784fda_0578-0582_VC.txt', 'EC137_05784fda_0583-0587_NK.txt', 'EC137_05784fda_0588-0592_DT.txt', 'EC137_05784fda_0593-0597_JG.txt']
0
0
6
6
7
7
7
7
7
7
7
10
['EC137_05784fda_0715-0719_NK.txt', 'EC137_2efa2b4d_0000-0004_DT.txt', 'EC137_2efa2b4

In [29]:
patient_df['crying'] = crying
patient_df['smiling'] = smiling 
patient_df['positive'] = pos
patient_df['negative'] = neg
patient_df['pain'] = pain
patient_df['laughing'] = laughing

In [30]:
patient_df

Unnamed: 0,IMS_times,converted_times,one_hour_before,one_hour_after,videos,crying,smiling,positive,negative,pain,laughing
0,1480446787,2016-11-29 11:13:07,2016-11-29 10:13:07,2016-11-29 12:13:07,[],0,0,0,0,0,0
1,1480527742,2016-11-30 09:42:22,2016-11-30 08:42:22,2016-11-30 10:42:22,"[[44b461c7, 0655.avi], [44b461c7, 0656.avi], [...",0,6,0,2,3,0
2,1480602438,2016-12-01 06:27:18,2016-12-01 05:27:18,2016-12-01 07:27:18,"[[1ded1630, 0557.avi], [1ded1630, 0558.avi], [...",0,0,0,0,1,0
3,1480635724,2016-12-01 15:42:04,2016-12-01 14:42:04,2016-12-01 16:42:04,"[[2fff85c8, 0114.avi], [2fff85c8, 0115.avi], [...",0,0,0,0,0,0
4,1480710576,2016-12-02 12:29:36,2016-12-02 11:29:36,2016-12-02 13:29:36,"[[bf0dd416, 0017.avi], [bf0dd416, 0018.avi], [...",0,0,0,0,1,0
5,1480797574,2016-12-03 12:39:34,2016-12-03 11:39:34,2016-12-03 13:39:34,"[[8a8511eb, 0022.avi], [8a8511eb, 0023.avi], [...",0,2,0,0,1,0
6,1480813680,2016-12-03 17:08:00,2016-12-03 16:08:00,2016-12-03 18:08:00,"[[8a8511eb, 0156.avi], [8a8511eb, 0157.avi], [...",0,19,0,0,0,0
7,1480889621,2016-12-04 14:13:41,2016-12-04 13:13:41,2016-12-04 15:13:41,"[[9d4045fa, 0068.avi], [9d4045fa, 0069.avi], [...",0,10,0,0,0,0
8,1480959501,2016-12-05 09:38:21,2016-12-05 08:38:21,2016-12-05 10:38:21,"[[9d4045fa, 0651.avi], [9d4045fa, 0652.avi], [...",0,0,0,2,2,0
9,1481044361,2016-12-06 09:12:41,2016-12-06 08:12:41,2016-12-06 10:12:41,"[[508e0dd5, 0637.avi], [508e0dd5, 0638.avi], [...",0,0,0,0,4,0


In [31]:
data = patient_df.reindex_axis(('IMS_times', 'converted_times','crying','smiling','positive','laughing','pain','negative'), axis = 1)

  """Entry point for launching an IPython kernel.


In [32]:
data.to_csv('/Users/alia/Documents/Annotation/annotation_csvs/'+Patient+'_IMS.csv')