# Predict Workout

Predict Workout Day and distance

In [None]:
#!pip install sklearn

In [None]:
import pandas as pd
from datetime import datetime, timedelta

filepath = "data/"

def loadCleanData():
    df = pd.read_csv(filepath+ 'activities_clean.csv')
    df['Week'] = pd.to_datetime(df.Week)
    df['Activity Date'] = pd.to_datetime(df['Activity Date']) + timedelta(hours=3)
    return df

data = loadCleanData()
data.head()

### Feature Extraction

In [None]:
data['year'] = list(map(lambda t : t.year, data['Activity Date']))

In [None]:
data['month'] = list(map(lambda t : t.month, data['Activity Date']))

In [None]:
data['day'] = list(map(lambda t : t.day, data['Activity Date']))

In [None]:
data['hour'] = list(map(lambda t : t.hour, data['Activity Date']))

In [None]:
data['minute'] = list(map(lambda t : t.minute, data['Activity Date']))

In [None]:
data['dayOfWeek'] = list(map(lambda t : t.dayofweek, data['Activity Date']))

In [None]:
data['isWeekend'] = ((pd.DatetimeIndex(data['Activity Date']).dayofweek) // 5 == 1).astype(int)

### Transform Data

In [None]:
# convert string data to numeric value for decision making
from sklearn import preprocessing
le = preprocessing.LabelEncoder()

le.fit(data['Activity Type'])
le.classes_

In [None]:
vals = le.transform(le.classes_)
vals

In [None]:
data['rideType'] = le.transform(data['Activity Type'])
data['rideType']

### Prepare features

In [None]:
# Select features as list of array
X = data[['month','day','hour','minute','dayOfWeek','isWeekend']]
X = X.to_numpy()
X

### Prepare Predict

In [None]:
Y_distance = data['Distance']
Y_distance = Y_distance.to_numpy()
Y_distance

In [None]:
Y_rideType = data['rideType']
Y_rideType = Y_rideType.to_numpy()
Y_rideType

In [None]:
Y = data[['Distance','rideType']]
Y = Y.to_numpy()
Y

## Feature Evaluation

In [None]:
# ride distribution per hour
data.plot(kind='scatter', x='hour', y='Distance', xticks=data['hour'], figsize=(14,8))

In [None]:
# ride distribution per hour
data.plot(kind='scatter', x='dayOfWeek', y='Distance', xticks=data['dayOfWeek'], figsize=(14,8))

In [None]:
# ride distribution per hour
data.plot(kind='scatter', x='isWeekend', y='Distance', xticks=data['isWeekend'], figsize=(14,8))

## Training Score

In [None]:
from sklearn.model_selection import TimeSeriesSplit
from sklearn import linear_model
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt

# Spot Check Algorithms
models = []
models.append(('LR', linear_model.LinearRegression()))
models.append(('NN', MLPRegressor(solver = 'lbfgs')))  #neural network
models.append(('KNN', KNeighborsRegressor())) 
models.append(('RF', RandomForestRegressor(n_estimators = 10))) # Ensemble method - collection of many decision trees
models.append(('SVR', SVR(gamma='auto'))) # kernel = linear
# Evaluate each model in turn
results = []
names = []
for name, model in models:
    # TimeSeries Cross validation
    tscv = TimeSeriesSplit(n_splits=10)
    
    cv_results = cross_val_score(model, X, Y, cv=tscv, scoring='r2')
    results.append(cv_results)
    names.append(name)
    print('%s: %f (%f)' % (name, cv_results.mean(), cv_results.std()))
    
# Compare Algorithms
plt.boxplot(results, labels=names)
plt.title('Algorithm Comparison')
plt.show()

In [None]:
X[166:167]

## Linear Regression Try

In [None]:
# example of training a final regression model
from sklearn.linear_model import LinearRegression
# fit final model
model = LinearRegression()
model.fit(X[0:160], Y[0:160])
# make a prediction
Xnew = X[160:167]
ynew = model.predict(Xnew)
# show the inputs and predicted outputs
for i in range(len(Xnew)):
    j = 160
    print("X=%s, Predicted=%s, Actual=%s" % (Xnew[i], ynew[i],Y_rideType[j+i]))

In [None]:
result = model.predict([[3,27,8,36,6,1]])
result

## LogisticRegression for RideType

In [None]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0).fit(X, Y_rideType)
result_ridetype = clf.predict([[12,27,8,36,6,1]])
print("Result type prediction=%s" % result_ridetype)

## Weather Data Export

In [None]:
import requests
API_KEY = "YOUR_API_KEY"
response = requests.get("http://api.worldweatheronline.com/premium/v1/past-weather.ashx?key="+API_KEY+"&q=Istanbul&format=json&date=2020-12-25")
api_results=response.json()
print(api_results)

In [None]:
api_results['data']['weather'][0]['hourly']