# Weight Loss

In [1]:
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
import autoreload
import missingno as msno
import warnings
import csv
import os
import sys

from collections import defaultdict, Counter

from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import LinearSVR, SVR 
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error 

%matplotlib inline
plt.rcParams.update({'font.size':16})

sb.set_style('darkgrid')
sb.set()

pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 100)




In [2]:
df = pd.read_csv("D:\Open Classroom\Datasets\Steps Count for Fitness Journey Dataset\Steps Export_ 3-17-21to9-23-22.csv")
df.head()

Unnamed: 0,Date,Step Count,Distance Travelled,Active Time,Flights Climbed,Goal,Percentage of Goal Completed,Calories
0,3/17/2021,1500,1.02 km,23:14 min,3,10000,15%,64
1,3/18/2021,3165,2.23 km,44:56 min,2,10000,32%,134
2,3/19/2021,1494,1.08 km,24:51 min,0,10000,15%,68
3,3/20/2021,1808,1.27 km,30:04 min,1,10000,18%,81
4,3/21/2021,2667,1.78 km,30:57 min,3,10000,27%,103


In [3]:
df.shape

(526, 8)

In [4]:
df.isnull().sum()

Date                            0
Step Count                      0
Distance Travelled              0
Active Time                     0
Flights Climbed                 0
Goal                            0
Percentage of Goal Completed    0
Calories                        0
dtype: int64

In [5]:
df['Date'] = pd.to_datetime(df['Date'])

In [6]:
df['unit'] = df['Distance Travelled'].str.split(' ', expand = True).get(1)

In [7]:
df.head()

Unnamed: 0,Date,Step Count,Distance Travelled,Active Time,Flights Climbed,Goal,Percentage of Goal Completed,Calories,unit
0,2021-03-17,1500,1.02 km,23:14 min,3,10000,15%,64,km
1,2021-03-18,3165,2.23 km,44:56 min,2,10000,32%,134,km
2,2021-03-19,1494,1.08 km,24:51 min,0,10000,15%,68,km
3,2021-03-20,1808,1.27 km,30:04 min,1,10000,18%,81,km
4,2021-03-21,2667,1.78 km,30:57 min,3,10000,27%,103,km


In [8]:
df['Percentage of Goal Completed'] = (df['Step Count'] * 100) /df['Goal']

In [9]:
df.head()

Unnamed: 0,Date,Step Count,Distance Travelled,Active Time,Flights Climbed,Goal,Percentage of Goal Completed,Calories,unit
0,2021-03-17,1500,1.02 km,23:14 min,3,10000,15.0,64,km
1,2021-03-18,3165,2.23 km,44:56 min,2,10000,31.65,134,km
2,2021-03-19,1494,1.08 km,24:51 min,0,10000,14.94,68,km
3,2021-03-20,1808,1.27 km,30:04 min,1,10000,18.08,81,km
4,2021-03-21,2667,1.78 km,30:57 min,3,10000,26.67,103,km


In [10]:
df['Distance Travelled'] = df['Distance Travelled'].str.replace('km', '')

In [11]:
df.head()

Unnamed: 0,Date,Step Count,Distance Travelled,Active Time,Flights Climbed,Goal,Percentage of Goal Completed,Calories,unit
0,2021-03-17,1500,1.02,23:14 min,3,10000,15.0,64,km
1,2021-03-18,3165,2.23,44:56 min,2,10000,31.65,134,km
2,2021-03-19,1494,1.08,24:51 min,0,10000,14.94,68,km
3,2021-03-20,1808,1.27,30:04 min,1,10000,18.08,81,km
4,2021-03-21,2667,1.78,30:57 min,3,10000,26.67,103,km


In [12]:
df['Distance Travelled'] = pd.to_numeric(df['Distance Travelled'])

In [13]:
df['Distance Travelled'].dtypes

dtype('float64')

In [14]:
df['Goal'].dtypes

dtype('int64')

In [15]:
df['Percentage of Goal Completed'].dtypes

dtype('float64')

In [16]:
df[['Active Time', 'Time units']] = df['Active Time'].str.split(' ', expand=True)#.get(1)
df.head()

Unnamed: 0,Date,Step Count,Distance Travelled,Active Time,Flights Climbed,Goal,Percentage of Goal Completed,Calories,unit,Time units
0,2021-03-17,1500,1.02,23:14,3,10000,15.0,64,km,min
1,2021-03-18,3165,2.23,44:56,2,10000,31.65,134,km,min
2,2021-03-19,1494,1.08,24:51,0,10000,14.94,68,km,min
3,2021-03-20,1808,1.27,30:04,1,10000,18.08,81,km,min
4,2021-03-21,2667,1.78,30:57,3,10000,26.67,103,km,min


In [17]:
df['Active Time'] = df['Active Time'].str.replace(':', '.')
df['Active Time'] = pd.to_numeric(df['Active Time'])

In [18]:
df['Active Time'].dtypes

dtype('float64')

In [19]:
df.head()

Unnamed: 0,Date,Step Count,Distance Travelled,Active Time,Flights Climbed,Goal,Percentage of Goal Completed,Calories,unit,Time units
0,2021-03-17,1500,1.02,23.14,3,10000,15.0,64,km,min
1,2021-03-18,3165,2.23,44.56,2,10000,31.65,134,km,min
2,2021-03-19,1494,1.08,24.51,0,10000,14.94,68,km,min
3,2021-03-20,1808,1.27,30.04,1,10000,18.08,81,km,min
4,2021-03-21,2667,1.78,30.57,3,10000,26.67,103,km,min


In [20]:
df['Active Time'].dtypes

dtype('float64')

In [21]:
# df['Active Time in Mins'] = np.where(df['Active Time'] * 60, df['Time units'] == 'hr')
# def create_minutes(data['Active Time']):
for i in df['Active Time']:
    if any(df['Time units'] == 'hr'):
        df['Active time in mins'] = df['Active Time'] * 60
    elif any(df['Time units'] == 'min'):
        df['Active time in mins'] = df['Active Time']


In [22]:
df.head(30)

Unnamed: 0,Date,Step Count,Distance Travelled,Active Time,Flights Climbed,Goal,Percentage of Goal Completed,Calories,unit,Time units,Active time in mins
0,2021-03-17,1500,1.02,23.14,3,10000,15.0,64,km,min,1388.4
1,2021-03-18,3165,2.23,44.56,2,10000,31.65,134,km,min,2673.6
2,2021-03-19,1494,1.08,24.51,0,10000,14.94,68,km,min,1470.6
3,2021-03-20,1808,1.27,30.04,1,10000,18.08,81,km,min,1802.4
4,2021-03-21,2667,1.78,30.57,3,10000,26.67,103,km,min,1834.2
5,2021-03-22,2368,1.69,37.25,1,10000,23.68,105,km,min,2235.0
6,2021-03-23,3595,2.68,54.1,2,10000,35.95,161,km,min,3246.0
7,2021-03-24,779,0.58,13.08,2,10000,7.79,36,km,min,784.8
8,2021-03-25,2566,1.87,38.38,1,10000,25.66,113,km,min,2302.8
9,2021-03-26,1687,1.24,25.42,1,10000,16.87,75,km,min,1525.2


In [23]:
#df['Active Time'] = df['Active Time'].str.replace(' hr', '*60')

In [24]:
#df['Active Time'] = df['Active Time'].str.replace(':','.')

In [25]:
#df['Active Time'] = df['Active Time'].str.replace('min', '')

In [26]:
#df['Active Time'] = df['Active Time'].astype(float)

In [27]:
df.head(30)

Unnamed: 0,Date,Step Count,Distance Travelled,Active Time,Flights Climbed,Goal,Percentage of Goal Completed,Calories,unit,Time units,Active time in mins
0,2021-03-17,1500,1.02,23.14,3,10000,15.0,64,km,min,1388.4
1,2021-03-18,3165,2.23,44.56,2,10000,31.65,134,km,min,2673.6
2,2021-03-19,1494,1.08,24.51,0,10000,14.94,68,km,min,1470.6
3,2021-03-20,1808,1.27,30.04,1,10000,18.08,81,km,min,1802.4
4,2021-03-21,2667,1.78,30.57,3,10000,26.67,103,km,min,1834.2
5,2021-03-22,2368,1.69,37.25,1,10000,23.68,105,km,min,2235.0
6,2021-03-23,3595,2.68,54.1,2,10000,35.95,161,km,min,3246.0
7,2021-03-24,779,0.58,13.08,2,10000,7.79,36,km,min,784.8
8,2021-03-25,2566,1.87,38.38,1,10000,25.66,113,km,min,2302.8
9,2021-03-26,1687,1.24,25.42,1,10000,16.87,75,km,min,1525.2
