# Bikeshare 

In [72]:
import pandas as pd
import numpy as np
from tabulate import tabulate
import inspect
import seaborn as sns 
import matplotlib.pyplot as plt

In [73]:
day = pd.read_csv('Bike-Sharing-Dataset/day.csv') #index_coulmns 0
hour = pd.read_csv('Bike-Sharing-Dataset/hour.csv')

### Functions might be used:

In [74]:
def retrieve_name(var):
        """
        Gets the name of var. Does it from the out most frame inner-wards.
        :param var: variable to get name from.
        :return: string
        """
        for fi in reversed(inspect.stack()):
            names = [var_name for var_name, var_val in fi.frame.f_locals.items() if var_val is var]
            if len(names) > 0:
                return names[0]

In [75]:
def missing(df):

    total = df.isnull().sum().sort_values(ascending=False)
    percent_1 = df.isnull().sum()/df.isnull().count()*100
    percent_2 = (round(percent_1, 1)).sort_values(ascending=False)
    missing_data = pd.concat([total, percent_2], axis=1, keys=['Total', '%'])
    return missing_data.head(5)

In [76]:
def iter_columns(df):
    for col_name in df.columns: 
        print(col_name)
        

In [77]:
def get_cname(df,col):
    return df.columns[df.columns.get_loc(col)]  # <--- Is ther a nicer way to do this?



In [91]:
def explore(*args):
    for arg in args:
        print('Data exploration for',retrieve_name(arg),':','\n')
        print(arg.info(),'\n')
        #print('Describe the data:\n',arg.describe().round(2),'\n')
        for col in arg:
            if len(arg[col].unique()) < 50:
                print('List unique values of column',print(col),':\n',arg[col].unique()) #get_cname(arg,col)
        print('\n','Missing Values:\n',missing(arg),'\n\n\n') 
        print('Column names for',retrieve_name(arg),':\n')
        for col_name in arg.columns: 
            print(col_name, end='   ')
        print('\n')

### Targets

- Improve availability of bikes for working professionals / salaried employees as they are the most valued customers.
- Improve availability of bikes for casual customers.
- Provide statistics about contribution of weather in bike demands.
- Provide statistics about how traffic and pollution affect sales.



### Questions

- Is there a correlation between temp and feeled temp?
- Is there a correlation between temp, atemp and the toal bike rentals?
- Is there a correlation between season and feeled temp / temp?

## Data Exploration

In [79]:
explore(day, hour)

Data exploration for day : 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 731 entries, 0 to 730
Data columns (total 16 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   instant     731 non-null    int64  
 1   dteday      731 non-null    object 
 2   season      731 non-null    int64  
 3   yr          731 non-null    int64  
 4   mnth        731 non-null    int64  
 5   holiday     731 non-null    int64  
 6   weekday     731 non-null    int64  
 7   workingday  731 non-null    int64  
 8   weathersit  731 non-null    int64  
 9   temp        731 non-null    float64
 10  atemp       731 non-null    float64
 11  hum         731 non-null    float64
 12  windspeed   731 non-null    float64
 13  casual      731 non-null    int64  
 14  registered  731 non-null    int64  
 15  cnt         731 non-null    int64  
dtypes: float64(4), int64(11), object(1)
memory usage: 91.5+ KB
None 

List unique values of column season :
 [1 2 3 4]
List uni

## Preprocessing

### Categorize Data

Both hour.csv and day.csv have the following fields, except hr which is not available in day.csv


#### Original columns:

- instant: record index
- dteday : date
- season : season (1:winter, 2:spring, 3:summer, 4:fall)
- yr : year (0: 2011, 1:2012)
- mnth : month ( 1 to 12)
- hr : hour (0 to 23)
- holiday : weather day is holiday or not (extracted from [Web Link])
- weekday : day of the week
- workingday : if day is neither weekend nor holiday is 1, otherwise is 0.
 weathersit:
  - 1: Clear, Few clouds, Partly cloudy, Partly cloudy
  - 2: Mist + Cloudy, Mist + Broken clouds, Mist + Few clouds, Mist
  - 3: Light Snow, Light Rain + Thunderstorm + Scattered clouds, Light Rain + Scattered clouds
  - 4: Heavy Rain + Ice Pallets + Thunderstorm + Mist, Snow + Fog
- temp : Normalized temperature in Celsius. The values are derived via (t-t_min)/(t_max-t_min), t_min=-8, t_max=+39 (only in hourly scale)
- atemp: Normalized feeling temperature in Celsius. The values are derived via (t-t_min)/(t_max-t_min), t_min=-16, t_max=+50 (only in hourly scale)
- hum: Normalized humidity. The values are divided to 100 (max)
- windspeed: Normalized wind speed. The values are divided to 67 (max)
- casual: count of casual users
- registered: count of registered users
- cnt: count of total rental bikes including both casual and registered

#### New columns:

- day: add hours
- raw_tem
- raw_atemp
- raw_hum

### Data Transformatin / Categorization 

In [80]:
# Calculating the raw weather values
def raw_temp(temp):
    """
    Reverse normalized temperature (temp):
    tMin = -8
    tMax =  +39
    normTemp = (tempRaw-tMin)/(tMax-tMin)
    tempRaw = tMax*normTemp-tMin*normTemp+tMin
    returns tempRaw
    """    
    return 39*temp-(-8)*temp+(-8)

def raw_atemp(atemp):
    """
    Reverse normaliued feeling temperature (atemp):
    tMin = -16
    tMax =  +50
    normTemp = (atempRaw-tMin)/(tMax-tMin)
    atempRaw = tMax*normTemp-tMin*normTemp+tMin
    returns atempRaw
    """
    return 50*atemp-(-8)*atemp+(-8)

def raw_hum(hum):
    return hum*100

# Converting temperature in categories

def categorize_temp(t):
    if t <= -15:
        return 'Death'
    elif t > -15 and t <= -10:
        return 'Frigid'
    elif t > -10 and t <= -3:
        return 'Freezing'
    elif t > -3 and t <= 3:
        return 'Very cold'
    elif t > 3 and t <= 12:
        return 'Cold'
    elif t > 12 and t <= 17:
        return 'Cool'
    elif t > 17 and t <= 25:
        return 'Mild'
    elif t > 25 and t <= 33:
        return 'Warm'
    elif t > 33 and t <= 40:
        return 'Hot'
    elif t > 40 and t <= 45:
        return 'Very hot'
    else:
        return 'Extremely hot'

# Test function
#for i in range(-20, 50):
#    print(i,'=',categorize_temp(i))


In [81]:
# Categorizing numeric values and further transformation. 

data = [day,hour] # <-- Ask about how data is saved to a variable. 

season_map = {1:'Winter', 2:'Spring', 3:'Summer', 4:'Fall'}
holiday_map = {0:'No', 1:'Yes'}
yr_map = {0:'2011', 1:'2012'}
workingday_map = {0:'No', 1:'Yes'}
weekday_map = {6:'Saturday', 0:'Sunday', 1:'Monday', 2:'Tuesday', 3:'Wednesday', 4:'Thursday', 5:'Friday'}
mnth_map = {1:'January', 2:'February', 3:'March', 4:'April', 5:'May', 6:'June', 7:'July', 8:'August', 9:'September', 10:'October', 11:'November', 12:'December'}
weathersit_map = {1:'nice', 2:'cloudy', 3:'wet', 4:'lousy'}

# All mapings in this for loop will be applied to day and hour. 
for df in data:
    df['season'] = df['season'].map(season_map)
    df['holiday'] = df['holiday'].map(holiday_map)
    df['yr'] = df['yr'].map(yr_map)
    df['workingday'] = df['workingday'].map(workingday_map)
    df['weekday'] = df['weekday'].map(weekday_map)
    df['mnth'] = df['mnth'].map(mnth_map)
    df['weathersit'] = df['weathersit'].map(weathersit_map)
    df['dteday'] = pd.to_datetime(df['dteday'], format='%Y-%m-%d') # <-- How to change the datetime format to '%d.%m.%Y' | This was chaning the dtype --> .dt.strftime('%m/%d/%Y')
    df['raw_temp'] = raw_temp(df['temp'])
    df['raw_atemp'] = raw_atemp(df['atemp'])
    df['raw_hum'] = raw_hum(df['hum'])


In [85]:
for df in data:
    df['cat_temp'] = categorize_temp(df['raw_temp'])
    df['cat_atemp'] = categorize_temp(df['raw_atemp'])

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [None]:
print(hour.corr())
sns.heatmap(hour.corr())

In [None]:
hour.head()


In [None]:
day.head()

In [None]:
day.info()

In [None]:
hour.info()