# Fill In Missing Data

In [33]:
import os
import json

from requests import get
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

### Obtain Data aggregations

In [2]:
mifit = pd.read_csv('part2/mifit_data.csv')
mifit.head()

Unnamed: 0,date,distance,steps
0,2019-08-18,769.142504,2394.0
1,2019-08-19,1177.21661,3492.0
2,2019-08-20,1615.47721,4537.0
3,2019-08-21,613.885731,3314.0
4,2019-08-22,1609.174505,5723.0


In [3]:
na_data = mifit[mifit['distance']==0]
na_data.head()

Unnamed: 0,date,distance,steps
13,2019-08-31,0.0,1162.0
14,2019-09-01,0.0,510.0
15,2019-09-02,0.0,613.0
16,2019-09-03,0.0,371.0
20,2019-09-07,0.0,11929.0


In [4]:
train_data = mifit[~ (mifit['distance']==0)]
train_data.head()

Unnamed: 0,date,distance,steps
0,2019-08-18,769.142504,2394.0
1,2019-08-19,1177.21661,3492.0
2,2019-08-20,1615.47721,4537.0
3,2019-08-21,613.885731,3314.0
4,2019-08-22,1609.174505,5723.0


In [5]:
train_x = np.array(train_data['steps']).reshape(-1,1)
train_y = np.array(train_data['distance']).reshape(-1,1)

In [6]:
lr = LinearRegression()
lr.fit(train_x, train_y)
lr

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [7]:
na_x = np.array(na_data['steps']).reshape(-1,1)
na_y = lr.predict(na_x)

In [8]:
na_y.reshape(11)

array([ 688.80054713,  422.65946131,  464.7032218 ,  365.92079424,
       5083.8021898 , 3398.78623541, 1735.81264088, 1181.48810938,
       2497.90682374, 2154.20928654, 2088.89859063])

In [9]:
na_y = na_y.reshape(11)
na_y

array([ 688.80054713,  422.65946131,  464.7032218 ,  365.92079424,
       5083.8021898 , 3398.78623541, 1735.81264088, 1181.48810938,
       2497.90682374, 2154.20928654, 2088.89859063])

In [10]:
na_data['distance'] = na_y - 214.48161811040586
na_data.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,date,distance,steps
13,2019-08-31,474.318929,1162.0
14,2019-09-01,208.177843,510.0
15,2019-09-02,250.221604,613.0
16,2019-09-03,151.439176,371.0
20,2019-09-07,4869.320572,11929.0


In [11]:
mifit.loc[na_data.index, 'distance'] = na_y
mifit[(mifit['distance']==0)]

Unnamed: 0,date,distance,steps


In [12]:
mifit.to_csv('part2/fitness_data.csv', index=False)

In [13]:
mifit.loc[:, 'distance'] = np.round(mifit['distance'],4)
mifit.head()

Unnamed: 0,date,distance,steps
0,2019-08-18,769.1425,2394.0
1,2019-08-19,1177.2166,3492.0
2,2019-08-20,1615.4772,4537.0
3,2019-08-21,613.8857,3314.0
4,2019-08-22,1609.1745,5723.0


### Work On Daily Data

In [14]:
files_list = mifit['date'].tolist()
files_list = list(map(lambda x: x + '.csv', files_list))
files_list[:2]

['2019-08-18.csv', '2019-08-19.csv']

In [26]:
dfs_dict = {}
folder_name = 'part2/data/Takeout/Fit/Daily Aggregations'
for num,f_name in enumerate(os.listdir(folder_name)):
    if f_name in files_list:
        print(f_name)
        df = pd.read_csv(os.path.join(folder_name, f_name))
        df = df[['Start time', 'Distance (m)', 'Step count']].fillna(0)
        x = np.array(df['Step count']).reshape(-1,1)
        y = lr.predict(x)
        df.loc[:, 'Distance (m)'] = y - 214.48161811040586
        dfs_dict[f_name[:-4]] = df
        

2019-08-28.csv
2019-08-29.csv
2019-09-08.csv
2019-09-09.csv
2019-09-04.csv
2019-09-10.csv
2019-09-11.csv
2019-09-05.csv
2019-09-13.csv
2019-09-07.csv
2019-09-06.csv
2019-09-12.csv
2019-09-16.csv
2019-09-02.csv
2019-09-03.csv
2019-09-01.csv
2019-09-15.csv
2019-09-14.csv
2019-08-27.csv
2019-08-26.csv
2019-08-18.csv
2019-08-24.csv
2019-08-30.csv
2019-08-31.csv
2019-08-25.csv
2019-08-19.csv
2019-08-21.csv
2019-08-20.csv
2019-08-22.csv
2019-08-23.csv


In [30]:
unwound_list = []

for key in dfs_dict.keys():
    df = dfs_dict[key]
    start_times = df['Start time'].tolist()
    distances = df['Distance (m)'].tolist()
    steps = df['Step count'].tolist()
    unwound_list.append({     'date': key,
                         'start_times': start_times,
                         'distances': distances,
                         'steps': steps,
                         'total_distance' : mifit['distance'][(mifit['date']==key)].values[0],
                         'total_steps' : mifit['steps'][(mifit['date']==key)].values[0]
                        })

In [44]:
#with open(os.path.join('part2/daily_activies.json'), 'w') as f:
#    f.write(json.dump(unwound_list))

#json.dump(unwound_list, os.path.join('part2/daily_activies.json'))

AttributeError: 'str' object has no attribute 'write'

In [18]:
mifit['distance'][(mifit['date']=='2019-08-28')].values[0]

2127.054

In [28]:
d = unwound_dict['2019-08-28']
print(sum(d['distances']), sum(d['steps']), d['total_distance'], d['total_steps'])

1740.5300459104426 4264.0 2127.054 4264.0


In [None]:
todos = get('https://jsonplaceholder.typicode.com/todos/').json()
todos

In [42]:
with open(os.path.join('part2/todos.json'), 'w') as f:
    f.write(json.dumps(todos))