In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter

from sklearn.linear_model import Ridge

from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import r2_score, mean_absolute_error
import math
import pytz

from scipy.stats import norm
from datetime import timedelta, datetime

from warnings import filterwarnings
filterwarnings('ignore')

In [None]:
def Load_management_system(df, total_range, date=None, last_date=None, time=None, plot=False):

  def date_changer(date):
    if date is None:
      date = datetime.today()
    else:
      date = date.split('/')
      date = datetime(int(date[2]), int(date[1]), int(date[0]))
    return date

  def Data_Preprocessing(df):
    '''
      It convert data into linear transformation for model training
    '''
    last_date = df['Date'].to_list()[-1]
    first_date = df['Date'][0]
    num_of_days = last_date - first_date 

    ldf = pd.DataFrame(columns=['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'])
    dates = []
    week_dict = {'Sun': 0, 'Mon': 0, 'Tue': 0, 'Wed': 0, 'Thu': 0, 'Fri': 0, 'Sat': 0}
    full_charge_dates = df['Date'][df['Full Charge'] == 1]
    for i in (first_date + timedelta(n) for n in range(num_of_days.days+1)):
      week_dict[i.strftime('%a')] += 1
      if i in df['Date'][df['Full Charge'] == 1].values:
        dates.append(i)
        ldf = ldf.append(week_dict, ignore_index=True)
        week_dict = {'Sun': 0, 'Mon': 0, 'Tue': 0, 'Wed': 0, 'Thu': 0, 'Fri': 0, 'Sat': 0}

    ldf['Date'] = dates

    km_driven = []
    add = 0
    for ra, fc in zip(df['Range Added'].values, df['Full Charge'].values):
      add += ra
      if fc == 1:
        km_driven.append(add)
        add = 0

    ldf['km_driven'] = km_driven
    ldf = ldf[1:]
    return ldf
  
  def model_training(dff):
    X = dff.drop(['Date', 'km_driven'], axis=1)
    y = dff['km_driven']

    model = Ridge(alpha=0.2)
    model.fit(X, y)

    return model

  def Linear_Transformation(old_date, new_date):
    old_date = date_changer(old_date)
    print(f'Last plugged in date: {old_date.strftime("%d-%m-%Y")}, Weekday: {old_date.strftime("%A")}')

    new_date = date_changer(new_date)

    num_of_days = new_date - old_date 

    week_dict = {'Sun': 0, 'Mon': 0, 'Tue':0, 'Wed':0, 'Thu':0, 'Fri': 0, 'Sat': 0}

    print(f'\nEstimation on Date: {new_date.strftime("%d-%m-%Y")}, Weekday: {new_date.strftime("%A")}')

    total_weeks = []
    for d in (old_date + timedelta(n) for n in range(1, num_of_days.days + 1)):
      total_weeks.append(d.strftime('%a'))

    for w in total_weeks:
      week_dict[w] += 1

    y_test = [week_dict['Sun'], week_dict['Mon'], week_dict['Tue'], week_dict['Wed'], week_dict['Thu'],
              week_dict['Fri'], week_dict['Sat']]

    return y_test

  def model_evalution(model, y_test):

    driven_distance = model.predict([y_test])[0]
    return driven_distance

  def Probabilistic_distribution_of_distance(df, prediction, total_range):

    df['Range left'] = df['Range Added'].apply(lambda x: total_range - x)

    data = df['Range left'].values

    mean = np.mean(data)                                            
    std = np.std(data)

    #print(f'\nMean : {round(mean, 2)}, Standard Deviation: {round(std, 2)}')

    dist = norm(mean, std)

    max_threshold = max(data)
    if prediction > total_range:
      prediction = total_range
    
    range_left = total_range - prediction
    print(f"\nEstimated Distance driven: {round(prediction, 2)} km")
    print(f'\nEstimated Range left: {round(range_left, 2)} km')
    
    if range_left > max_threshold:                                     
      prob = ((total_range - range_left) / (total_range - max_threshold)) * 0.05    
                                                                        
    else:                                                                                                                            
      prob = 0.05 + (1 - dist.cdf(range_left)) * 0.95
    
    prob = prob * 100
    print(f"\nprobability of plugin (Day wise): {round(prob, 2)} %")
    return prob, dist

  def hrs_to_min(x):
    x = str(x).split(':')
    x = int(x[0]) * 60 + int(x[1])
    return x

  def min_to_hrs(m):
    hr = str(m // 60)
    min = str(m % 60)
    if len(min) == 1:
      min = str(0) + min
    return ':'.join([hr, min])

  def Probability_of_plugin(df, date, time):

    df['plugin_min'] = df['Plug In'].apply(lambda x: hrs_to_min(x))

    date = date_changer(date)
    day = date.strftime('%a')



    if day == 'Mon':
      if 'Mon' in df['DOW'].values:
        weekday_df = df[df['DOW'] == 'Mon']
      else:
        weekday_df = df[~((df['DOW'] == 'Sat') | (df['DOW'] == 'Sun'))]

    if day == 'Tue':
      if 'Tue' in df['DOW'].values:
        weekday_df = df[df['DOW'] == 'Tue']
      else:
        weekday_df = df[~((df['DOW'] == 'Sat') | (df['DOW'] == 'Sun'))]
        
    if day == 'Wed':
      if 'Wed' in df['DOW'].values:
        weekday_df = df[df['DOW'] == 'Wed']
      else:
        weekday_df = df[~((df['DOW'] == 'Sat') | (df['DOW'] == 'Sun'))]

    if day == 'Thu':
      if 'Thu' in df['DOW'].values:
        weekday_df = df[df['DOW'] == 'Thu']
      else:
        weekday_df = df[~((df['DOW'] == 'Sat') | (df['DOW'] == 'Sun'))]

    if day == 'Fri':
      if 'Fri' in df['DOW'].values:
        weekday_df = df[df['DOW'] == 'Fri']
      else:
        weekday_df = df[~((df['DOW'] == 'Sat') | (df['DOW'] == 'Sun'))]

    if day == 'Sat':
      if 'Sat' in df['DOW'].values:
        weekday_df = df[df['DOW'] == 'Sat']
      else:
        if 'Sun' in df['DOW'].values:
          weekday_df = df[df['DOW'] == 'Sun']
        else:
          weekday_df = df[~((df['DOW'] == 'Sat') | (df['DOW'] == 'Sun'))]

    if day == 'Sun':
      if 'Sun' in df['DOW'].values:
        weekday_df = df[df['DOW'] == 'Sun']
      else:
        if 'Sat' in df['DOW'].values:
          weekday_df = df[df['DOW'] == 'Sat']
        else:
          weekday_df = df[~((df['DOW'] == 'Sat') | (df['DOW'] == 'Sun'))]

    print(weekday_df)

    mean = np.mean(weekday_df['plugin_min'])
    std = np.std(weekday_df['plugin_min'])

    #print(f"Mean : {min_to_hrs(int(mean))} hr, STD: {min_to_hrs(int(std))} hr")

    time_dist = norm(mean, std)

    if time is None:
      time = datetime.utcnow()
      time = time.replace(tzinfo=pytz.UTC)
      time = time.astimezone(pytz.timezone('Asia/Kolkata'))
      if len(str(time.minute)) == 1:
        time = str(time.hour) + ':0' + str(time.minute)
      else:
        time = str(time.hour) + ':' + str(time.minute)
    
    plugin_prob = (1 - time_dist.cdf(hrs_to_min(time))) * 100

    print(f'\nPlugin time Probability on {time} : {round(plugin_prob, 3)} %')

    return time_dist, plugin_prob 

  def Distribution_plot(dist, total_range):
    values = [val for val in range(total_range, -1, -1)]
    pdf = [dist.pdf(val) for val in values]
    cdf = [(1 - dist.cdf(val)) for val in values]

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

    ax1.plot(values, pdf)
    ax2.plot(values, cdf)

    ax1.set_title('Probability distribution of Range left', size=15)
    ax2.set_title('Cumulative distribution of Range left', size=15)

    ax1.set_xlabel('Range left (km)', size=12)
    ax1.set_ylabel('Probability', size=12)
    ax2.set_xlabel('Range left (km)', size=12)
    ax2.set_ylabel('Probability', size=12) 

    #plt.savefig('range_left_pdf.png')

    plt.show()

  def Plugin_distribution_plot(time_dist):  
    values = [val for val in range(540, 1320, 15)]

    pdf = [time_dist.pdf(val) for val in values]
    cdf = [time_dist.cdf(val) for val in values]

    time = [min_to_hrs(val) for val in range(540, 1320, 15)]

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

    ax1.plot(time, pdf)
    ax2.plot(time, cdf)

    ax1.set_title('Probability Distribution of Plugin time', size=15)
    ax2.set_title('Cumulative Distribution of plugin time', size=15)

    ax1.set_xlabel('Time', size=12)
    ax2.set_xlabel('Time', size=12)

    ax1.set_ylabel('Probability', size=12)
    ax2.set_ylabel('Probability', size=12)

    fig.autofmt_xdate(bottom=0.2, rotation=90, ha='right', which=None)
    for label in ax1.get_xaxis().get_ticklabels()[::2]:
        label.set_visible(False)

    for label in ax2.get_xaxis().get_ticklabels()[::2]:
        label.set_visible(False)

    plt.show()

  km_per_kwh = 6.6
  df['Range Added'] = df['kWh Added'].apply(lambda x: x * km_per_kwh)
  df['DOW'] = df['Date'].apply(lambda x: x.strftime('%a'))

  ldf = Data_Preprocessing(df)
  model = model_training(ldf)

  old_date = last_date
  new_date = date
  y_test = Linear_Transformation(old_date, new_date)

  driven_distance = model_evalution(model, y_test)

  prob, dist = Probabilistic_distribution_of_distance(df, driven_distance, total_range)

  time_dist, plugin_prob = Probability_of_plugin(df, date, time)

  if plot:
    Distribution_plot(dist, total_range)
    Plugin_distribution_plot(time_dist)


In [None]:
df = pd.read_excel('/content/drive/MyDrive/Blackcoffer/Load_management_system.xlsx')[:5]
# df is all the data of user such as date, DOW, Range added and other
total_range = 400
date = '24/07/2021'       # if date is None It will take today's date and we can also give manual date to check probability 
last_date = "21/07/2021"  # last date of charging
time = '19:00'       # if None it will take current time from local timezone and we can also change manually
plot = False    # Put True if you want to distribution plot

Load_management_system(df, total_range, date, last_date, time, plot)

Last plugged in date: 21-07-2021, Weekday: Wednesday

Estimation on Date: 24-07-2021, Weekday: Saturday

Estimated Distance driven: 254.74 km

Estimated Range left: 145.26 km

probability of plugin (Day wise): 85.44 %
        Date          Plug In         Plug Out  ...  DOW  Range left  plugin_min
0 2021-01-05  17:00:05.904000  07:30:08.256000  ...  Tue       191.0        1020
1 2021-01-08  16:09:33.323000  08:00:58.655000  ...  Fri       170.0         969
2 2021-01-13  18:00:38.794000  07:26:55.397000  ...  Wed       160.0        1080
3 2021-01-18  17:35:24.519000  07:09:54.241000  ...  Mon       153.0        1055
4 2021-01-22  16:57:43.110000  08:12:03.347000  ...  Fri       140.0        1017

[5 rows x 9 columns]

Plugin time Probability on 19:00 : 0.15 %


In [None]:
np.std([179.0, 185])

3.0

In [None]:
datetime(2021, 1, 22).strftime('%a')

'Fri'