## Extraction

In [315]:
%matplotlib inline

# Write column names

In [316]:
def listToString(s): 
    
    # initialize an empty string
    str1 = "\n" 
    
    # return string  
    return (str1.join(s))

# Libraries Import

In [317]:
import numpy as np
import pandas as pd
from datetime import datetime

# For import export of model
import pickle
import json
from sklearn.linear_model import LinearRegression


## For Review

In [318]:
# sys.argv[1]

with open('longevity_template.pickle', 'rb') as f:
    fragrance, profile, weather = pickle.load(f)

In [319]:
# fragrance_df = pd.read_json(fragrance)

fragrance_df    =  pd.DataFrame(data=eval(fragrance), index=[0])
profile_df      =  pd.DataFrame(data=eval(profile),   index=[0])
weather_df      =  pd.DataFrame(data=eval(weather),   index=[0])

In [320]:
# fragrance_df
# profile_df
# weather_df

#### Fixing Weather Keys

In [321]:
def fix_weather_keys(df):
    old_weather_columns = df.columns
    new_weather_columns = []
    for i in range(len(old_weather_columns) - 2):
        new_weather_columns.append(old_weather_columns[i] + '_avg')
    new_weather_columns.extend(old_weather_columns[-2:])
    df.columns = new_weather_columns
    return df

In [322]:
weather_df = fix_weather_keys(weather_df)
weather_df

Unnamed: 0,temp_avg,hum_avg,dew_point_avg,uv_index_avg,temp_feels_like_avg,atm_pressure_avg,clouds_avg,visibility_avg,wind_speed_avg,rain_avg,snow_avg,weather_main,weather_desc
0,82.2525,75.25,73.88,6.69625,88.15,1013.5,66.125,60000,15.71125,2.86375,0,Rain,light rain


In [323]:
single_df = pd.concat([fragrance_df, profile_df, weather_df], axis=1)

In [324]:
# single_df.head()

## Cleaning

In [325]:
single_df = single_df[single_df.columns.difference(['weather_desc'], sort=False)]

In [326]:
# Missing Values
# df['rain_avg'].fillna(int(0), inplace=True)
# df['snow_avg'].fillna(int(0), inplace=True)

In [327]:
single_df['number_of_sprays'] = 7
single_df['indoor_time_percentage'] = 75
single_df['apply_time'] = datetime.now().strftime('%Y-%m-%d 12:00:00')

In [328]:
# single_df.dtypes

In [329]:
columns_to_drop = ['fragrance', 'fragrance_gender', 'fragrance_type', 'brand', 'brand_tier','uv_index_avg', 'visibility_avg', 'atm_pressure_avg', 'clouds_avg', 
                   'temp_feels_like_avg', 'wind_speed_avg', 'rain_avg', 'snow_avg', ]

single_df.drop(columns_to_drop, axis=1, inplace=True)

# single_df.head()

## Review Data

In [351]:
with open('longevity_direct_template.pickle', 'rb') as f:
    reviews = pickle.load(f)
reviews

['[{"longevity":20,"apply_time":"2021-02-12 10:58:00","indoor_time_percentage":54,"number_of_sprays":11,"projection":1.2,"sillage":22.5,"like":53,"temp_avg":81.43,"hum_avg":70.38,"dew_point_avg":73.01,"uv_index_avg":14,"temp_feels_like_avg":83.08,"atm_pressure_avg":1010.75,"clouds_avg":44.62,"visibility_avg":60000,"wind_speed_avg":13.31,"rain_avg":0.4,"snow_avg":0,"weather_main":"Rain","fp_id":1,"fp_country":"United States of America","gender":"Male","dob":"2019-11-12","profession":"Cafe Worker","skin_type":"Very Oily","sweat":82,"height":54,"weight":54,"climate":"Temperate (Warm and Mild Winter)","season":"Spring"},{"longevity":50,"apply_time":"2021-02-12 10:58:00","indoor_time_percentage":54,"number_of_sprays":11,"projection":2,"sillage":22.5,"like":53,"temp_avg":82.43,"hum_avg":75.38,"dew_point_avg":73.02,"uv_index_avg":14.1,"temp_feels_like_avg":83.08,"atm_pressure_avg":1010.75,"clouds_avg":44.62,"visibility_avg":60000,"wind_speed_avg":13.31,"rain_avg":0.4,"snow_avg":0,"weather_mai

In [346]:
reviews_df = pd.DataFrame(json.loads(reviews[0]))
reviews_df

Unnamed: 0,longevity,apply_time,indoor_time_percentage,number_of_sprays,projection,sillage,like,temp_avg,hum_avg,dew_point_avg,...,fp_country,gender,dob,profession,skin_type,sweat,height,weight,climate,season
0,20,2021-02-12 10:58:00,54,11,1.2,22.5,53,81.43,70.38,73.01,...,United States of America,Male,2019-11-12,Cafe Worker,Very Oily,82,54,54,Temperate (Warm and Mild Winter),Spring
1,50,2021-02-12 10:58:00,54,11,2.0,22.5,53,82.43,75.38,73.02,...,United States of America,Male,2019-11-12,Cafe Worker,Very Oily,82,54,54,Temperate (Warm and Mild Winter),Spring


In [332]:
reviews_df.drop([
    'uv_index_avg', 'visibility_avg', 'atm_pressure_avg', 'clouds_avg', 
    'temp_feels_like_avg', 'wind_speed_avg', 'rain_avg', 'snow_avg', 'projection', 'sillage', 
    'climate', 'like'
    ], axis=1, inplace=True)

# reviews_df.columns

In [333]:
# set(single_df.columns) ^ set(reviews_df.columns)

## Combining dfs

In [334]:
df = pd.concat([reviews_df, single_df])
# df

## Operations on full df

In [335]:
# df = df.convert_dtypes()

# Datetime
df['dob']               = df['dob'].astype('datetime64[ns]')
df['apply_time']        = df['apply_time'].astype('datetime64[ns]')

# df.dtypes

In [336]:
# Calcualting Age

now = pd.to_datetime('now')
df['age'] = (now - df['dob']).dt.total_seconds() / (60*60*24*365.25)
df.drop(['dob'], axis=1, inplace=True)

In [337]:
# Sorting out Dates

# Apply Time
# df['apply_time_year']          = df['apply_time'].dt.year
df['apply_time_month']         = df['apply_time'].dt.month
df['apply_time_day']           = df['apply_time'].dt.day
df['apply_time_hour']          = df['apply_time'].dt.hour
# df['apply_time_minute']        = df['apply_time'].dt.minute
# df['apply_time_weekday_name']  = df['apply_time'].dt.day_name()

# Type Cast
df['age']               = df['age'].astype('float')

# Drop Apply Time & Wear Off Time
df.drop(['apply_time'], axis=1, inplace=True)

In [338]:
categorical_columns = df.select_dtypes(include=['object']).columns.values
df = df.convert_dtypes()
# categorical_columns

In [339]:
df = pd.get_dummies(df, columns=categorical_columns, prefix=categorical_columns, prefix_sep='_')
# df

In [340]:
Y = df['longevity']
X = df.drop('longevity', axis=1)
# Y

In [341]:
# Remove constant columns
X = X.loc[:, (X != X.iloc[0]).any()] 
# X

In [342]:
# Take the last row for prediction
X_bar = X.tail(1)
# X_bar = X_bar.drop('longevity', axis=1)

# Removing fom dataset
X = X.iloc[:-1]
Y = Y.iloc[:-1]

# X_bar

In [343]:
model = LinearRegression().fit(X, Y)

In [344]:
print(model.predict(X_bar)[0])

49.055119018772984


# END