# Predict Workout

Predict Workout Day and distance

In [None]:
#!pip install sklearn

In [None]:
import pandas as pd
from datetime import datetime, timedelta

filepath = "data/"

def loadCleanData():
    df = pd.read_csv(filepath+ 'activities_corolated.csv')
    df['Week'] = pd.to_datetime(df.Week)
    df['Activity Date'] = pd.to_datetime(df['Activity Date']) + timedelta(hours=3)
    return df

data = loadCleanData()
data.head()

### Feature Extraction

In [None]:
# Since data is limited these features doesn't make sense
#data['year'] = list(map(lambda t : t.year, data['Activity Date']))
#data['month'] = list(map(lambda t : t.month, data['Activity Date']))
#data['minute'] = list(map(lambda t : t.minute, data['Activity Date']))

In [None]:
data['day'] = list(map(lambda t : t.day, data['Activity Date']))

In [None]:
data['hour'] = list(map(lambda t : t.hour, data['Activity Date']))

In [None]:
data['dayOfWeek'] = list(map(lambda t : t.dayofweek, data['Activity Date']))

In [None]:
data['isWeekend'] = ((pd.DatetimeIndex(data['Activity Date']).dayofweek) // 5 == 1).astype(int)

#### Weather Data Export

In [None]:
import requests
from myconfig import *

# api_key is defined in myconfig file
city = "Istanbul" 
date = "2020-12-25"
hour = 8

def get_weather(api_key,city,date,hour):
    
    # Use different endpoints if it's an old weather data or future forecast
    if type(date) == str:
        date = datetime.fromisoformat(date)
        
    if date>datetime.now():
        endpoint = f"http://api.worldweatheronline.com/premium/v1/weather.ashx?key={api_key}&q={city}&date={date}&format=json"
    else:
        endpoint = f"http://api.worldweatheronline.com/premium/v1/past-weather.ashx?key={api_key}&q={city}&date={date}&format=json"
    
    response = requests.get(endpoint)
    api_results=response.json()

    # Weather data is splitted into 3hour slices, in below code finding hour slot for weather
    hour_slice = int(hour/3) 

    # Selecting weather hour slice 
    temp = api_results['data']['weather'][0]['hourly'][hour_slice]

    # Return weather details
    temp_c = temp['tempC']
    windspeed_miles = temp['windspeedMiles']
    weather_desc = temp['weatherDesc'][0]['value']

    return int(temp_c), int(windspeed_miles), weather_desc

temp_data = get_weather(api_key,city,date,hour)
print(temp_data)

In [None]:
date = "2021-03-05"
hour = 8
temp_data = get_weather(api_key,city,date,hour)
print(temp_data)

#### Get Weather Data and Save 

In [None]:
## iterate for all ride date to extract weather
#for index, row in data.iterrows():
#    temp, wind, weatherDesc = get_weather(api_key,city,row['Activity Date'],row['hour'])
#    data['temp'][index] = temp
#    data['wind'][index] = wind 
#    data['weatherDesc'][index] = weatherDesc
#    print(row['Activity Date'],temp, wind, weatherDesc)

In [None]:
## Export corolated data
#data.to_csv('data/activities_corolated.csv', index=False)
#print('Successfully saved!')

### Transform Data

In [None]:
# convert string data to numeric value for decision making
from sklearn import preprocessing
le_activity = preprocessing.LabelEncoder()

le_activity.fit(data['Activity Type'])
le_activity.classes_

In [None]:
vals = le_activity.transform(le_activity.classes_)
vals

In [None]:
data['rideType'] = le_activity.transform(data['Activity Type'])
data['rideType']

In [None]:
# Transform weather conditions data to numeric value for decision making
le = preprocessing.LabelEncoder()
le.fit(data['weatherDesc'])
le.classes_

In [None]:
weather = le.transform(le.classes_)
weather

In [None]:
data['weather'] = le.transform(data['weatherDesc'])
data['weather']

### Prepare features

In [None]:
# Select features as list of array
X = data[['hour','dayOfWeek','isWeekend','temp','wind','weather']]
X = X.to_numpy()
X

### Prepare Predict

In [None]:
Y_distance = data['Distance']
Y_distance = Y_distance.to_numpy()
Y_distance

In [None]:
Y_rideType = data['rideType']
Y_rideType = Y_rideType.to_numpy()
Y_rideType

In [None]:
Y = data[['Distance','rideType']]
Y = Y.to_numpy()
Y

## Feature Evaluation

In [None]:
# ride distribution per hour
data.plot(kind='scatter', x='hour', y='Distance', xticks=data['hour'], figsize=(14,8))

In [None]:
# ride distribution per hour
data.plot(kind='scatter', x='dayOfWeek', y='Distance', xticks=data['dayOfWeek'], figsize=(14,8))

In [None]:
# ride distribution per hour
data.plot(kind='scatter', x='isWeekend', y='Distance', xticks=data['isWeekend'], figsize=(14,8))

## Training Score

In [None]:
# example of training a final regression model
from sklearn.linear_model import LinearRegression
# fit final model
model = LinearRegression()
model.fit(X[0:160], Y_distance[0:160])
# make a prediction
Xnew = X[160:167]
ynew = model.predict(Xnew)
# show the inputs and predicted outputs
for i in range(len(Xnew)):
    j = 160
    print("X=%s, Predicted=%s, Actual Distance=%s, Actual Ride Type=%s" % (Xnew[i], ynew[i],Y_distance[j+i],Y_rideType[j+i]))

In [None]:
result = model.predict([[8,6,1,9,15,0]])
result

## LogisticRegression for RideType

In [None]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0).fit(X, Y_rideType)
result_ridetype = clf.predict([[8,6,1,20,3,0]])
print("Result type prediction=%s" % result_ridetype)

result_ridetype = clf.predict([[8,6,1,10,12,1]])
print("Result type prediction=%s" % result_ridetype)

In [None]:
#Ride for the weekend
result_ridetype = clf.predict([[8,6,1,8,12,3],[9,7,1,20,17,3]])
print("Result type prediction=%s" % result_ridetype)

#Ride for Weekend
result = model.predict([[9,6,1,10,12,3],[9,7,1,20,17,3]])
print(result)

In [None]:
import pickle
# Save to file in the model folder
distance_model_file = "model/distance_model.pkl"
with open(distance_model_file, 'wb') as file:
    pickle.dump(model, file)
    
ridetype_model_file = "model/ridetype_model.pkl"
with open(ridetype_model_file, 'wb') as file:
    pickle.dump(clf, file)

In [None]:
from datetime import datetime, time
import pickle

def predict_workout(wdate,wtime):
    wtime = time.fromisoformat(wtime)
    temp_data = get_weather(api_key,city,wdate,wtime.hour)
    workout_temp, workout_wind , workout_weatherdesc = temp_data
    
    # weather description
    try:
        workout_weather = int(le.transform([workout_weatherdesc])[0])
    except:
        workout_weather = 0
    
    wdate = datetime.fromisoformat(wdate)
    workout_hour = wtime.hour
    workout_dayofweek = wdate.isoweekday()
    workout_isweekend = int(workout_dayofweek // 6 == 1)
    
    # Load ride type model from file
    ridetype_model_file = "model/ridetype_model.pkl"
    with open(ridetype_model_file, 'rb') as file:
        ridetype_model = pickle.load(file)
      
    result_ridetype = ridetype_model.predict([[workout_hour,workout_dayofweek,workout_isweekend,workout_temp,workout_wind,workout_weather]])
    print("Result type prediction=%s" % result_ridetype)
    
    # Load distance model from file
    distance_model_file = "model/distance_model.pkl"
    with open(distance_model_file, 'rb') as file:
        distance_model = pickle.load(file)
    
    result = distance_model.predict([[workout_hour,workout_dayofweek,workout_isweekend,workout_temp,workout_wind,workout_weather]])
    return result

workout_date = '2021-04-04'
workout_time = '20:00:00'
predict_workout(workout_date,workout_time)