#Behavioral Slope
The Behavioral Slope , a core aspect of the SMART GPA project, delves into student behavior patterns and their influence on academic performance. Utilizing the comprehensive Student Life dataset, this analysis aims to uncover trends in behaviors such as study habits, social interactions, and sleep routines. By assessing shifts and breakpoints in these behaviors, the analysis seeks to identify potential correlations with changes in GPA. With data sourced from sensors and smartphone apps, this exploration offers insights into students' multifaceted daily lives. By linking behavioral dynamics with academic outcomes, the analysis contributes to a deeper understanding of factors impacting student success.

Paper Link = [Smart GPA](https://studentlife.cs.dartmouth.edu/smartgpa.pdf)
Student Life Dataset = [Dataset]()



Upload  Kaggle API


Download it from here(Kaggle JSON) -
https://www.kaggle.com/settings/account

In [None]:
#Downloading Dataset
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d dartweichen/student-life --unzip

Downloading student-life.zip to /content
100% 390M/390M [00:18<00:00, 28.8MB/s]
100% 390M/390M [00:18<00:00, 22.0MB/s]


In [None]:
#Importing Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
import datetime

Creating A dataframe for Slopes

In [None]:
df1=pd.read_csv("/content/dataset/education/grades.csv")
uids=df1['uid'].tolist()
AllSlopes=pd.DataFrame()
AllSlopes['uids']=uids
AllSlopes['gpa_all']=df1[' gpa all'].tolist()
AllSlopes['Spring_gpa']=df1[' gpa 13s'].tolist()
AllSlopes.set_index('uids', inplace=True)

###Activity Slope

Heightened physical activity correlates with slope increase.

In [None]:
#Calculating Slopes for Whole Day
def linear_regression_activity(df):
  df = df.assign(timestamp=df['timestamp'].apply(lambda x: x.timestamp()))
  x=np.array(df['timestamp']).reshape(-1,1)
  y=np.array(df[' activity inference']).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
#Calculating Slopes during daytime
def linear_regression_activity_day(df):
  df=df[(df['timestamp'].dt.hour >= 9) & (df['timestamp'].dt.hour < 18)]
  df = df.assign(timestamp=df['timestamp'].apply(lambda x: x.timestamp()))
  x=np.array(df['timestamp']).reshape(-1,1)
  y=np.array(df[' activity inference']).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
#Calculating Slopes during nighttime
def linear_regression_activity_night(df):
  df=df[(df['timestamp'].dt.hour >= 0) & (df['timestamp'].dt.hour < 9)]
  df = df.assign(timestamp=df['timestamp'].apply(lambda x: x.timestamp()))
  x=np.array(df['timestamp']).reshape(-1,1)
  y=np.array(df[' activity inference']).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
#Calculating Slopes during evening
def linear_regression_activity_evening(df):
  df=df[(df['timestamp'].dt.hour >= 18) & (df['timestamp'].dt.hour < 24)]
  df = df.assign(timestamp=df['timestamp'].apply(lambda x: x.timestamp()))
  x=np.array(df['timestamp']).reshape(-1,1)
  y=np.array(df[' activity inference']).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
#Calculating Activity Slope for each student
for uid in uids:
  file="/content/dataset/sensing/activity/activity_"+uid+".csv"
  try:
    #Data Preprocessing
    df = pd.read_csv(file)
    df=df[df[' activity inference']!=3]
    df.loc[:,'timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
    #Calculating Slopes
    AllSlopes.loc[uid,'SlopeActivityTerm']=linear_regression_activity(df)[0][0]
    AllSlopes.loc[uid,'SlopeActivityDayTerm']=linear_regression_activity_day(df)[0][0]
    AllSlopes.loc[uid,'SlopeActivityNightTerm']=linear_regression_activity_night(df)[0][0]
    AllSlopes.loc[uid,'SlopeActivityEveningTerm']=linear_regression_activity_evening(df)[0][0]
    #Spliting data into Pre and Post
    split_index = int(len(df) * 0.45)
    AllSlopes.loc[uid,'SlopeActivityTermPre']=linear_regression_activity(df.iloc[:split_index])[0][0]
    AllSlopes.loc[uid,'SlopeActivityTermPost']=linear_regression_activity(df.iloc[split_index:])[0][0]
    AllSlopes.loc[uid,'SlopeActivityDayPre']=linear_regression_activity_day(df.iloc[:split_index])[0][0]
    AllSlopes.loc[uid,'SlopeActivityDayPost']=linear_regression_activity_day(df.iloc[split_index:])[0][0]
    AllSlopes.loc[uid,'SlopeActivityNightPre']=linear_regression_activity_night(df.iloc[:split_index])[0][0]
    AllSlopes.loc[uid,'SlopeActivityNightPost']=linear_regression_activity_night(df.iloc[split_index:])[0][0]
    AllSlopes.loc[uid,'SlopeActivityEveningPre']=linear_regression_activity_evening(df.iloc[:split_index])[0][0]
    AllSlopes.loc[uid,'SlopeActivityEveningPost']=linear_regression_activity_evening(df.iloc[split_index:])[0][0]
  except:
    continue

### Audio

In [None]:
def linear_regression_audio(df):
  x=np.array(df['timestamp']).reshape(-1,1)
  y=np.array(df[' audio inference']).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
def linear_regression_audio_day(df):
  df=df[(df['timestamp'].dt.hour >= 9) & (df['timestamp'].dt.hour < 18)]
  x=np.array(df['timestamp']).reshape(-1,1)
  y=np.array(df[' audio inference']).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
def linear_regression_audio_evening(df):
  df=df[(df['timestamp'].dt.hour >= 18) & (df['timestamp'].dt.hour < 24)]
  x=np.array(df['timestamp']).reshape(-1,1)
  y=np.array(df[' audio inference']).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_


In [None]:
def linear_regression_audio_night(df):
  df=df[(df['timestamp'].dt.hour >= 0) & (df['timestamp'].dt.hour < 9)]
  x=np.array(df['timestamp']).reshape(-1,1)
  y=np.array(df[' audio inference']).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
for uid in uids:
  file="/content/dataset/sensing/audio/audio_"+uid+".csv"
  try:
    #Data Preprocessing
    df = pd.read_csv(file)
    df=df[df[' audio inference']!=3]
    AllSlopes.loc[uid,'SlopeAudioTerm']=linear_regression_audio(df)[0][0]
    #Data Preprocessing
    df.loc[:,'timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
    #Calculating Slopes
    AllSlopes.loc[uid,'SlopeAudioDayTerm']=linear_regression_audio_day(df)[0][0]
    AllSlopes.loc[uid,'SlopeAudioNightTerm']=linear_regression_audio_night(df)[0][0]
    AllSlopes.loc[uid,'SlopeAudioEveningTerm']=linear_regression_audio_evening(df)[0][0]
    #Spliting data into Pre and Post
    split_index = int(len(df) * 0.45)
    AllSlopes.loc[uid,'SlopeAudioTermPre']=linear_regression_audio(df.iloc[:split_index])[0][0]
    AllSlopes.loc[uid,'SlopeAudioTermPost']=linear_regression_audio(df.iloc[split_index:])[0][0]
    AllSlopes.loc[uid,'SlopeAudioDayPre']=linear_regression_audio_day(df.iloc[:split_index])[0][0]
    AllSlopes.loc[uid,'SlopeAudioDayPost']=linear_regression_audio_day(df.iloc[split_index:])[0][0]
    AllSlopes.loc[uid,'SlopeAudioNightPre']=linear_regression_audio_night(df.iloc[:split_index])[0][0]
    AllSlopes.loc[uid,'SlopeAudioNightPost']=linear_regression_audio_night(df.iloc[split_index:])[0][0]
    AllSlopes.loc[uid,'SlopeAudioEveningPre']=linear_regression_audio_evening(df.iloc[:split_index])[0][0]
    AllSlopes.loc[uid,'SlopeAudioEveningPost']=linear_regression_audio_evening(df.iloc[split_index:])[0][0]
  except:
    continue

###Conversation Duration


In [None]:
def linear_regression_conversation_duration(df):
  x=np.array(df['start_timestamp']).reshape(-1,1)
  y=np.array(df[' end_timestamp']-df['start_timestamp']).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
def linear_regression_conversation_duration_day(df):
  df=df[(df['start_timestamp'].dt.hour >= 9) & (df['start_timestamp'].dt.hour < 18)]
  x=np.array(df['start_timestamp']).reshape(-1,1)
  y=np.array(df[' end_timestamp']-df['start_timestamp']).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
def linear_regression_conversation_duration_night(df):
  df=df[(df['start_timestamp'].dt.hour >= 0) & (df['start_timestamp'].dt.hour < 9)]
  x=np.array(df['start_timestamp']).reshape(-1,1)
  y=np.array(df[' end_timestamp']-df['start_timestamp']).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
def linear_regression_conversation_duration_evening(df):
  df=df[(df['start_timestamp'].dt.hour >= 18) & (df['start_timestamp'].dt.hour < 24)]
  x=np.array(df['start_timestamp']).reshape(-1,1)
  y=np.array(df[' end_timestamp']-df['start_timestamp']).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
for uid in uids:
  file="/content/dataset/sensing/conversation/conversation_"+uid+".csv"
  try:
    #Data Preprocessing
    df = pd.read_csv(file)
    #Calculating Slopes
    AllSlopes.loc[uid,'SlopeConDurTerm']=linear_regression_conversation_duration(df)[0][0]
    #data preprocessing
    df['start_timestamp'] = pd.to_datetime(df['start_timestamp'], unit='s')
    df[' end_timestamp'] = pd.to_datetime(df[' end_timestamp'], unit='s')
    AllSlopes.loc[uid,'SlopeConDurDayTerm']=linear_regression_conversation_duration_day(df)[0][0]
    AllSlopes.loc[uid,'SlopeConDurNightTerm']=linear_regression_conversation_duration_night(df)[0][0]
    AllSlopes.loc[uid,'SlopeConDurEveningTerm']=linear_regression_conversation_duration_evening(df)[0][0]
    #Spliting data into Pre and Post
    split_index = int(len(df) * 0.45)
    AllSlopes.loc[uid,'SlopeConDurTermPre']=linear_regression_conversation_duration(df.iloc[:split_index])[0][0]
    AllSlopes.loc[uid,'SlopeConDurTermPost']=linear_regression_conversation_duration(df.iloc[split_index:])[0][0]
    AllSlopes.loc[uid,'SlopeConDurDayPre']=linear_regression_conversation_duration_day(df.iloc[:split_index])[0][0]
    AllSlopes.loc[uid,'SlopeConDurDayPost']=linear_regression_conversation_duration_day(df.iloc[split_index:])[0][0]
    AllSlopes.loc[uid,'SlopeConDurNightPre']=linear_regression_conversation_duration_night(df.iloc[:split_index])[0][0]
    AllSlopes.loc[uid,'SlopeConDurNightPost']=linear_regression_conversation_duration_night(df.iloc[split_index:])[0][0]
    AllSlopes.loc[uid,'SlopeConDurEveningPre']=linear_regression_conversation_duration_evening(df.iloc[:split_index])[0][0]
    AllSlopes.loc[uid,'SlopeConDurEveningPost']=linear_regression_conversation_duration_evening(df.iloc[split_index:])[0][0]
  except:
    continue

###Conversation frequency

In [None]:
def linear_regression_conversation_freq(df):
  # Count the number of rows for each date
  daily_counts = df.groupby('date').size().reset_index(name='counts')
  daily_counts['date'] = daily_counts['date'].map(datetime.datetime.toordinal)
  x=np.array(daily_counts["date"]).reshape(-1,1)
  y=np.array(daily_counts["counts"]).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
def linear_regression_conversation_freq_day(df):
  df=df[(df['start_timestamp'].dt.hour >= 9) & (df['start_timestamp'].dt.hour < 18)]
  # Count the number of rows for each date
  daily_counts = df.groupby('date').size().reset_index(name='counts')
  daily_counts['date'] = daily_counts['date'].map(datetime.datetime.toordinal)
  x=np.array(daily_counts["date"]).reshape(-1,1)
  y=np.array(daily_counts["counts"]).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
def linear_regression_conversation_freq_evening(df):
  df=df[(df['start_timestamp'].dt.hour >= 18) & (df['start_timestamp'].dt.hour < 24)]
  # Count the number of rows for each date
  daily_counts = df.groupby('date').size().reset_index(name='counts')
  daily_counts['date'] = daily_counts['date'].map(datetime.datetime.toordinal)
  x=np.array(daily_counts["date"]).reshape(-1,1)
  y=np.array(daily_counts["counts"]).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
def linear_regression_conversation_freq_night(df):
  df=df[(df['start_timestamp'].dt.hour >= 0) & (df['start_timestamp'].dt.hour < 9)]
  # Count the number of rows for each date
  daily_counts = df.groupby('date').size().reset_index(name='counts')
  daily_counts['date'] = daily_counts['date'].map(datetime.datetime.toordinal)
  x=np.array(daily_counts["date"]).reshape(-1,1)
  y=np.array(daily_counts["counts"]).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
for uid in uids:
  file="/content/dataset/sensing/conversation/conversation_"+uid+".csv"
  try:
    #data preprocessing
    df = pd.read_csv(file)
    df['start_timestamp'] = pd.to_datetime(df['start_timestamp'], unit='s')
    df[' end_timestamp'] = pd.to_datetime(df[' end_timestamp'], unit='s')
    df['date'] = df['start_timestamp'].dt.date
    #slope
    AllSlopes.loc[uid,'SlopeConFreqTerm']=linear_regression_conversation_freq(df)[0][0]
    AllSlopes.loc[uid,'SlopeConFreqDayTerm']=linear_regression_conversation_freq_day(df)[0][0]
    AllSlopes.loc[uid,'SlopeConFreqNightTerm']=linear_regression_conversation_freq_night(df)[0][0]
    AllSlopes.loc[uid,'SlopeConFreqEveningTerm']=linear_regression_conversation_freq_evening(df)[0][0]
    #Spliting data into Pre and Post
    split_index = int(len(df) * 0.45)
    AllSlopes.loc[uid,'SlopeConFreqTermPre']=linear_regression_conversation_freq(df.iloc[:split_index])[0][0]
    AllSlopes.loc[uid,'SlopeConFreqTermPost']=linear_regression_conversation_freq(df.iloc[split_index:])[0][0]
    AllSlopes.loc[uid,'SlopeConFreqDayPre']=linear_regression_conversation_freq_day(df.iloc[:split_index])[0][0]
    AllSlopes.loc[uid,'SlopeConFreqDayPost']=linear_regression_conversation_freq_day(df.iloc[split_index:])[0][0]
    AllSlopes.loc[uid,'SlopeConFreqNightPre']=linear_regression_conversation_freq_night(df.iloc[:split_index])[0][0]
    AllSlopes.loc[uid,'SlopeConFreqNightPost']=linear_regression_conversation_freq_night(df.iloc[split_index:])[0][0]
    AllSlopes.loc[uid,'SlopeConFreqEveningPre']=linear_regression_conversation_freq_evening(df.iloc[:split_index])[0][0]
    AllSlopes.loc[uid,'SlopeConFreqEveningPost']=linear_regression_conversation_freq_evening(df.iloc[split_index:])[0][0]
  except:
    continue


###Indoor mobility - Wifi Location

In [None]:
def linear_regression_mobility(df):
  x=np.array(df['timestamp']).reshape(-1,1)
  y=np.array(df['location']).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
def linear_regression_mobility_day(df):
  df=df[(df['timestamp'].dt.hour >= 9) & (df['timestamp'].dt.hour < 18)]
  df = df.assign(timestamp=df['timestamp'].apply(lambda x: x.timestamp()))
  x=np.array(df['timestamp']).reshape(-1,1)
  y=np.array(df['location']).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
def linear_regression_mobility_evening(df):
  df=df[(df['timestamp'].dt.hour >= 18) & (df['timestamp'].dt.hour < 24)]
  df = df.assign(timestamp=df['timestamp'].apply(lambda x: x.timestamp()))
  x=np.array(df['timestamp']).reshape(-1,1)
  y=np.array(df['location']).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
def linear_regression_mobility_night(df):
  df=df[(df['timestamp'].dt.hour >= 0) & (df['timestamp'].dt.hour < 9)]
  df = df.assign(timestamp=df['timestamp'].apply(lambda x: x.timestamp()))
  x=np.array(df['timestamp']).reshape(-1,1)
  y=np.array(df['location']).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
for uid in uids:
  file="/content/dataset/sensing/wifi_location/wifi_location_"+uid+".csv"
  try:
    #data preprocessing
    df = pd.read_csv(file)
    df=df.reset_index()
    df.drop('location',axis=1,inplace=True)
    df.rename(columns={'index':'timestamp','time':'location'},inplace=True)
    df['location'] = df['location'].astype(str)
    df.loc[df['location'].str[:2].isin(['in']), 'location'] = 1
    df.loc[df['location'].str[:4].isin(['near']), 'location'] = 0
    #calculating Slopes
    AllSlopes.loc[uid,'SlopeMobilityTerm']=linear_regression_mobility(df)[0][0]
    df.loc[:,'timestamp'] = pd.to_datetime(df['timestamp'], unit='s')
    AllSlopes.loc[uid,'SlopeMobilityDayTerm']=linear_regression_mobility_day(df)[0][0]
    AllSlopes.loc[uid,'SlopeMobilityNightTerm']=linear_regression_mobility_night(df)[0][0]
    AllSlopes.loc[uid,'SlopeMobilityEveningTerm']=linear_regression_mobility_evening(df)[0][0]
    #Spliting data into Pre and Post
    split_index = int(len(df) * 0.45)
    AllSlopes.loc[uid,'SlopeMobilityTermPre']=linear_regression_mobility(df.iloc[:split_index])[0][0]
    AllSlopes.loc[uid,'SlopeMobilityTermPost']=linear_regression_mobility(df.iloc[split_index:])[0][0]
    AllSlopes.loc[uid,'SlopeMobilityDayPre']=linear_regression_mobility_day(df.iloc[:split_index])[0][0]
    AllSlopes.loc[uid,'SlopeMobilityDayPost']=linear_regression_mobility_day(df.iloc[split_index:])[0][0]
    AllSlopes.loc[uid,'SlopeMobilityNightPre']=linear_regression_mobility_night(df.iloc[:split_index])[0][0]
    AllSlopes.loc[uid,'SlopeMobilityNightPost']=linear_regression_mobility_night(df.iloc[split_index:])[0][0]
    AllSlopes.loc[uid,'SlopeMobilityEveningPre']=linear_regression_mobility_evening(df.iloc[:split_index])[0][0]
    AllSlopes.loc[uid,'SlopeMobilityEveningPost']=linear_regression_mobility_evening(df.iloc[split_index:])[0][0]
  except:
    continue


###Sleep

In [None]:
def linear_regression_sleep(df):
  x=np.array(df['resp_time']).reshape(-1,1)
  y=np.array(df['hour']).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
for uid in uids:
  file="/content/dataset/EMA/response/Sleep/Sleep_"+uid+".json"
  try:
    #data preprocessing
    df=pd.read_json(file)
    if uid!="u57" and uid!="u59":
      df.drop(['null','location'],axis=1,inplace=True)
    df=df.dropna()
    #calculating Slopes
    AllSlopes.loc[uid,'SlopeSleepTerm']=linear_regression_sleep(df)[0][0]
    #Spliting data into Pre and Post
    start_date = df['resp_time'].min()
    interval_length = pd.DateOffset(weeks=3)
    split_date = start_date + interval_length
    interval_1 = df[(df['resp_time'] >= start_date) & (df['resp_time'] < split_date)]
    interval_2 = df[df['resp_time'] >= split_date]
    #calculating Slopes
    if len(interval_1)==0:
      AllSlopes.loc[uid,'SlopeSleepTermPre']=np.nan
    else:
      AllSlopes.loc[uid,'SlopeSleepTermPre']=linear_regression_sleep(interval_1)[0][0]
    if len(interval_2)==0:
      AllSlopes.loc[uid,'SlopeSleepTermPost']=np.nan
    else:
      AllSlopes.loc[uid,'SlopeSleepTermPost']=linear_regression_sleep(interval_2)[0][0]
  except:
    continue

###Stress

1 - feeling great , 2 - feeling good, 3 - little stresses , 4 - definately stress , 5 -  stressed out


In [None]:
def linear_regression_stress(df):
  x=np.array(df['resp_time']).reshape(-1,1)
  y=np.array(df['level']).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
for uid in uids:
  file="/content/dataset/EMA/response/Stress/Stress_"+uid+".json"
  try:
    #Data preprocessing
    df=pd.read_json(file)
    df.drop(["null",'location'],axis=1,inplace=True)
    df=df.dropna()
    df.replace({1:3,2:4,3:5,4:2,5:1},inplace=True)
    #Calculating Slopes
    AllSlopes.loc[uid,'SlopeStressTerm']=linear_regression_stress(df)[0][0]
    #Spliting data into Pre and Post
    start_date = df['resp_time'].min()
    interval_length = pd.DateOffset(weeks=3) # Setting the interval to 3 weeks
    split_date = start_date + interval_length
    interval_1 = df[(df['resp_time'] >= start_date) & (df['resp_time'] < split_date)]
    interval_2 = df[df['resp_time'] >= split_date]
    #Calculating Slopes
    if len(interval_1)==0:
      AllSlopes.loc[uid,'SlopeStressTermPre']=np.nan
    else:
      AllSlopes.loc[uid,'SlopeStressTermPre']=linear_regression_stress(interval_1)[0][0]
    if len(interval_2)==0:
      AllSlopes.loc[uid,'SlopeStressTermPost']=np.nan
    else:
      AllSlopes.loc[uid,'SlopeStressTermPost']=linear_regression_stress(interval_2)[0][0]
  except:
    continue

###PAM

In [None]:
def linear_regression_pam(df):
  x=np.array(df['resp_time']).reshape(-1,1)
  y=np.array(df['picture_idx']).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
for uid in uids:
  file="/content/dataset/EMA/response/PAM/PAM_"+uid+".json"
  try:
    df=pd.read_json(file)
    df=df.dropna()
    AllSlopes.loc[uid,'SlopePAMTerm']=linear_regression_pam(df)[0][0]
    #Spliting data into Pre and Post
    start_date = df['resp_time'].min()
    interval_length = pd.DateOffset(weeks=3)
    split_date = start_date + interval_length
    interval_1 = df[(df['resp_time'] >= start_date) & (df['resp_time'] < split_date)]
    interval_2 = df[df['resp_time'] >= split_date]
    #Calculating Slopes
    if len(interval_1)==0:
      AllSlopes.loc[uid,'SlopePAMPre']=np.nan
    else:
      AllSlopes.loc[uid,'SlopePAMPre']=linear_regression_pam(interval_1)[0][0]
    if len(interval_2)==0:
      AllSlopes.loc[uid,'SlopePAMPost']=np.nan
    else:
      AllSlopes.loc[uid,'SlopePAMPost']=linear_regression_pam(interval_2)[0][0]
  except:
    continue

###Class Hours

In [None]:
def linear_regression_class(df):
  x=np.array(df['resp_time']).reshape(-1,1)
  y=np.array(df['hours']).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
for uid in uids:
    file="/content/dataset/EMA/response/Class/Class_"+uid+".json"

    df=pd.read_json(file)
    if uid!="u57" and uid!="u59":
      df.drop(["null",'location',"due","course_id"],axis=1,inplace=True)
    df=df.dropna()
    AllSlopes.loc[uid,'SlopeClassTerm']=linear_regression_class(df)[0][0]
    #Spliting data into Pre and Post
    start_date = df['resp_time'].min()
    interval_length = pd.DateOffset(weeks=3)
    split_date = start_date + interval_length
    interval_1 = df[(df['resp_time'] >= start_date) & (df['resp_time'] < split_date)]
    interval_2 = df[df['resp_time'] >= split_date]
    #Calculating Slopes
    if len(interval_1)==0:
      AllSlopes.loc[uid,'SlopeClassPre']=np.nan
    else:
      AllSlopes.loc[uid,'SlopeClassPre']=linear_regression_class(interval_1)[0][0]
    if len(interval_2)==0:
      AllSlopes.loc[uid,'SlopeClassPost']=np.nan
    else:
      AllSlopes.loc[uid,'SlopeClassPost']=linear_regression_class(interval_2)[0][0]


###Phone Charge

In [None]:
def linear_regression_phonecharge(df):
  x=np.array(df['start']).reshape(-1,1)
  y=np.array(df['end']-df['start']).reshape(-1,1)
  reg = LinearRegression()
  reg.fit(x,y)
  return reg.coef_

In [None]:
for uid in uids:
    file="/content/dataset/sensing/phonecharge/phonecharge_"+uid+".csv"
    df=pd.read_csv(file)
    AllSlopes.loc[uid,'SlopePhonechargeTerm']=linear_regression_phonecharge(df)[0][0]
    #Spliting data into Pre and Post
    split_index=int(len(df)*0.45)
    #Calculating Slopes
    AllSlopes.loc[uid,'SlopePhonechargePre']=linear_regression_phonecharge(df.iloc[:split_index])[0][0]
    AllSlopes.loc[uid,'SlopePhonechargePost']=linear_regression_phonecharge(df.iloc[split_index:])[0][0]

###Panas

In [None]:
df=pd.read_csv("/content/dataset/survey/panas.csv")

In [None]:
for index,rows in df.iterrows():
  if rows['uid'] in uids and rows["type"]=='pre':
    AllSlopes.loc[rows['uid'],'PositiveScorePre']=rows["Interested"]+rows["Strong"]+rows["Enthusiastic"]+rows["Proud"]+rows["Alert"]+rows["Inspired"]+rows["Determined "]+rows["Attentive"]+rows["Active "]
    AllSlopes.loc[rows['uid'],'NegativeScorePre']=rows['Distressed']+rows['Upset']+rows['Guilty']+rows['Scared']+rows['Hostile ']+rows['Irritable']+rows['Nervous']+rows['Jittery']+rows['Afraid ']
  if rows['uid'] in uids and rows["type"]=='post':
    AllSlopes.loc[rows['uid'],'PositiveScorePost']=rows["Interested"]+rows["Strong"]+rows["Enthusiastic"]+rows["Proud"]+rows["Alert"]+rows["Inspired"]+rows["Determined "]+rows["Attentive"]+rows["Active "]
    AllSlopes.loc[rows['uid'],'NegativeScorePost']=rows['Distressed']+rows['Upset']+rows['Guilty']+rows['Scared']+rows['Hostile ']+rows['Irritable']+rows['Nervous']+rows['Jittery']+rows['Afraid ']

###PHQ9

In [None]:
df=pd.read_csv("/content/dataset/survey/PHQ-9.csv")
df.drop(["Response"],axis=1,inplace=True)
df.replace({"Not at all":0,"Several days":1,"More than half the days":2,"Nearly every day":3},inplace=True)

In [None]:
for index,rows in df.iterrows():
  if rows['uid'] in uids and rows['type']=='pre':
    AllSlopes.loc[rows['uid'],'PHQ9ScorePre']=df.iloc[index,2]+df.iloc[index,3]+df.iloc[index,4]+df.iloc[index,5]+df.iloc[index,6]+df.iloc[index,7]+df.iloc[index,8]+df.iloc[index,9]+df.iloc[index,10]
  if rows['uid'] in uids and rows['type']=='post':
    AllSlopes.loc[rows['uid'],'PHQ9ScorePost']=df.iloc[index,2]+df.iloc[index,3]+df.iloc[index,4]+df.iloc[index,5]+df.iloc[index,6]+df.iloc[index,7]+df.iloc[index,8]+df.iloc[index,9]+df.iloc[index,10]

###Loneliness

In [None]:
df=pd.read_csv("/content/dataset/survey/LonelinessScale.csv")
df.replace({"Never":0,"Rarely":1,"Sometimes":2,"Often":3},inplace=True)

In [None]:
for index,rows in df.iterrows():
  if rows['uid'] in uids and rows['type']=='pre':
    AllSlopes.loc[rows['uid'],'LonelinessScorePre']=df.iloc[index,2]+df.iloc[index,3]+df.iloc[index,4]+df.iloc[index,5]+df.iloc[index,6]+df.iloc[index,7]+df.iloc[index,8]+df.iloc[index,9]+df.iloc[index,10]+df.iloc[index,11]+df.iloc[index,12]+df.iloc[index,13]+df.iloc[index,14]+df.iloc[index,15]+df.iloc[index,16]+df.iloc[index,17]+df.iloc[index,18]+df.iloc[index,19]+ df.iloc[index,20] + df.iloc[index,21]
  if rows['uid'] in uids and rows['type']=='post':
    AllSlopes.loc[rows['uid'],'LonelinessScorePost']=df.iloc[index,2]+df.iloc[index,3]+df.iloc[index,4]+df.iloc[index,5]+df.iloc[index,6]+df.iloc[index,7]+df.iloc[index,8]+df.iloc[index,9]+df.iloc[index,10]+df.iloc[index,11]+df.iloc[index,12]+df.iloc[index,13]+df.iloc[index,14]+df.iloc[index,15]+df.iloc[index,16]+df.iloc[index,17]+df.iloc[index,18]+df.iloc[index,19]+ df.iloc[index,20] + df.iloc[index,21]

###All Slopes

In [None]:
AllSlopes

In [None]:
AllSlopes.to_csv("AllSlopes.csv",index=True)