# Get the Cleaned Data Files

Import Libararies

In [32]:
#Import Libraries
import os
import pandas as pd
import numpy as np
import geopandas as gpd
from opencage.geocoder import OpenCageGeocode
import folium
from folium.plugins import HeatMap
import ipywidgets as widgets
from IPython.display import display
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from IPython.display import clear_output
from IPython.display import HTML
import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots

Update Directories

In [33]:
#Update the Database with relative paths
current_dir = os.getcwd()
task04_dir = os.path.dirname(current_dir)
database_dir = os.path.join(task04_dir, 'Database')
assests_dir = os.path.join(task04_dir, 'Assests')
cleaned_data_dir = os.path.join(database_dir, 'Cleaned Data')

Get Data File

In [34]:
#Colombo Motor Show Attendance
colombo_motor_show_attendance = pd.read_csv(os.path.join(cleaned_data_dir, 'ColomboMotorShowAttendees.csv'))

colombo_motor_show_attendance

Unnamed: 0,eventName,eventLocation,attndName,attndLocation,attndDate,attndRefHashTag
0,Colombo Motor Show,"BMICH, Colombo",Harshana Kalinga,"Kaduwela, Sri Lanka",11/18/2023,#ColomboMotorShow
1,Colombo Motor Show,"BMICH, Colombo",Sadish Pathirana,"Narammala, Sri Lanka",11/17/2023,#ColomboMotorShow
2,Colombo Motor Show,"BMICH, Colombo",Malith Pramodya Palliyaguruge,"Hakmana, Sri Lanka",11/18/2023,#ColomboMotorShow
3,Colombo Motor Show,"BMICH, Colombo",Shiromi Rathnayake,"Colombo, Sri Lanka",11/17/2023,#ColomboMotorShow
4,Colombo Motor Show,"BMICH, Colombo",Dilshan Subasinghe,"Galle, Sri Lanka",11/18/2023,#ColomboMotorShow
...,...,...,...,...,...,...
94,Colombo Motor Show,"BMICH, Colombo",Kavindu Katuwandeniya,"Mathara, Sri Lanka",11/18/2023,#ColomboMotorShow
95,Colombo Motor Show,"BMICH, Colombo",Nethmi Jayasekara,"Mathara, Sri Lanka",11/18/2023,#ColomboMotorShow
96,Colombo Motor Show,"BMICH, Colombo",Shehan Perera,"Kaduwela, Sri Lanka",11/19/2023,#ColomboMotorShow
97,Colombo Motor Show,"BMICH, Colombo",Jeewantha Ariyasinghe,"Kegalle, Sri Lanka",11/19/2023,#ColomboMotorShow


In [35]:
#count the number of attendees for each attndDate
colombo_motor_show_attendance['attndDate'].value_counts()

attndDate
11/19/2023    40
11/18/2023    26
11/20/2023    26
11/17/2023     7
Name: count, dtype: int64

In [36]:
#Weather Climate DAta
weather_climate_data = pd.read_csv(os.path.join(cleaned_data_dir, 'WeatherClimateData.csv'))

weather_climate_data

Unnamed: 0,eventLocation,attndDate,dhTemp (C),dlTemp (C),precipitation (mm),humidity,wind (km/h)
0,"BMICH, Colombo",11/17/2023,32,26,4.3,0.8684,5.3
1,"BMICH, Colombo",11/18/2023,31,25,6.7,0.8834,6.3
2,"BMICH, Colombo",11/19/2023,31,24,4.1,0.8937,6.9
3,"BMICH, Colombo",11/20/2023,31,24,2.7,0.8805,5.0
4,"BMICH, Colombo",11/21/2023,32,25,5.6,0.8893,6.3


In [37]:
#Set the date format explicitly
date_format = '%m/%d/%Y'

#Convert to datetime
colombo_motor_show_attendance['attndDate'] = pd.to_datetime(colombo_motor_show_attendance['attndDate'], format=date_format)
weather_climate_data['attndDate'] = pd.to_datetime(weather_climate_data['attndDate'], format=date_format)

In [38]:
weather_climate_data

Unnamed: 0,eventLocation,attndDate,dhTemp (C),dlTemp (C),precipitation (mm),humidity,wind (km/h)
0,"BMICH, Colombo",2023-11-17,32,26,4.3,0.8684,5.3
1,"BMICH, Colombo",2023-11-18,31,25,6.7,0.8834,6.3
2,"BMICH, Colombo",2023-11-19,31,24,4.1,0.8937,6.9
3,"BMICH, Colombo",2023-11-20,31,24,2.7,0.8805,5.0
4,"BMICH, Colombo",2023-11-21,32,25,5.6,0.8893,6.3


In [39]:
#Read the Location Data
Locations=pd.read_csv(os.path.join(cleaned_data_dir, 'Locations.csv'))
Locations

Unnamed: 0,location,lat,lng
0,"BMICH, Colombo",6.901107,79.873618
1,"Kaduwela, Sri Lanka",6.935703,79.984331
2,"Narammala, Sri Lanka",7.43073,80.214379
3,"Hakmana, Sri Lanka",6.083926,80.644974
4,"Colombo, Sri Lanka",6.938861,79.854201
5,"Galle, Sri Lanka",6.032814,80.214955
6,"Kurunegala, Sri Lanka",7.487046,80.364908
7,"Wattala, Sri Lanka",6.989871,79.892709
8,"Homagama, Sri Lanka",6.841238,80.003446
9,"Malabe, Sri Lanka",6.904072,79.954619


# Data Proprocessing

In [40]:
#Get BMICH, Colombo as the event location
event_location = Locations[Locations['location'] == 'BMICH, Colombo']

#Get other locations as attendee locations
attendee_locations = Locations[Locations['location'] != 'BMICH, Colombo']

In [41]:
#Get unique attndLocation and attndDate values
attndLocation = np.unique(attendee_locations['location'])
attndDate = np.unique(colombo_motor_show_attendance['attndDate'])

#Create empty arrays for attndDate and attndLocation
attndDate_array = np.array([])
attndLocation_array = np.array([])

#Add attndDate and attndLocation values to arrays
for date in attndDate:
    attndDate_array = np.append(attndDate_array, np.repeat(str(date), len(attndLocation)))
    attndLocation_array = np.append(attndLocation_array, attndLocation)

#Remove "T00:00:00.000000000" in attndDate
attndDate_array = np.char.replace(attndDate_array, 'T00:00:00.000000000', '')

#Create a dataframe with attndDate and attndLocation
AttendeesLocations = pd.DataFrame({'attndDate': attndDate_array, 'attndLocation': attndLocation_array})

#Convert attndDate to datetime for %Y-%m-%d format
AttendeesLocations['attndDate'] = pd.to_datetime(AttendeesLocations['attndDate']).dt.strftime('%Y-%m-%d')

AttendeesLocations

Unnamed: 0,attndDate,attndLocation
0,2023-11-17,"Anuradhapura, Sri Lanka"
1,2023-11-17,"Avissawella, Sri Lanka"
2,2023-11-17,"Badulla, Sri Lanka"
3,2023-11-17,"Bandaragama, Sri Lanka"
4,2023-11-17,"Bandarawela, Sri Lanka"
...,...,...
163,2023-11-20,"Nugegoda, Sri Lanka"
164,2023-11-20,"Piliyandala, Sri Lanka"
165,2023-11-20,"Rajagiriya, Sri Lanka"
166,2023-11-20,"Ratnapura, Sri Lanka"


In [42]:
#count the number of attendees for each location from the Colombo Motor Show Attendance dataset
attendees_count = colombo_motor_show_attendance.groupby(['attndDate', 'attndLocation']).size().reset_index(name='attendeesCount')

attendees_count

Unnamed: 0,attndDate,attndLocation,attendeesCount
0,2023-11-17,"Bandaragama, Sri Lanka",1
1,2023-11-17,"Colombo, Sri Lanka",1
2,2023-11-17,"Gampaha, Sri Lanka",1
3,2023-11-17,"Minuwangoda, Sri Lanka",1
4,2023-11-17,"Moratuwa, Sri Lanka",1
...,...,...,...
59,2023-11-20,"Kurunegala, Sri Lanka",1
60,2023-11-20,"Madampe, Sri Lanka",1
61,2023-11-20,"Mirigama, Sri Lanka",1
62,2023-11-20,"Negombo, Sri Lanka",2


In [43]:
#Convert attndDate to datetime for %Y-%m-%d format in attendees_count and AttendeesLocations dataframes
attendees_count['attndDate'] = pd.to_datetime(attendees_count['attndDate']).dt.strftime('%Y-%m-%d')
AttendeesLocations['attndDate'] = pd.to_datetime(AttendeesLocations['attndDate']).dt.strftime('%Y-%m-%d')

#Merge AttendeesLocations and attendees_count dataframes
AttendeesLocations = AttendeesLocations.merge(attendees_count, how='left', on=['attndDate', 'attndLocation'])

#Replace NaN values with 0
AttendeesLocations['attendeesCount'].fillna(0, inplace=True)

AttendeesLocations

Unnamed: 0,attndDate,attndLocation,attendeesCount
0,2023-11-17,"Anuradhapura, Sri Lanka",0.0
1,2023-11-17,"Avissawella, Sri Lanka",0.0
2,2023-11-17,"Badulla, Sri Lanka",0.0
3,2023-11-17,"Bandaragama, Sri Lanka",1.0
4,2023-11-17,"Bandarawela, Sri Lanka",0.0
...,...,...,...
163,2023-11-20,"Nugegoda, Sri Lanka",0.0
164,2023-11-20,"Piliyandala, Sri Lanka",0.0
165,2023-11-20,"Rajagiriya, Sri Lanka",0.0
166,2023-11-20,"Ratnapura, Sri Lanka",2.0


In [44]:
#Convert attndDate in AttendeesLocations and weather_climate_data to same format
AttendeesLocations['attndDate'] = pd.to_datetime(AttendeesLocations['attndDate']).dt.strftime('%m/%d/%Y')
weather_climate_data['attndDate'] = pd.to_datetime(weather_climate_data['attndDate']).dt.strftime('%m/%d/%Y')

#Left Join the AttendeesLocations and weather_climate_data dataframes
ModelData = AttendeesLocations.merge(weather_climate_data, how='left', on=['attndDate'])

#Drop eventLocation column
ModelData.drop('eventLocation', axis=1, inplace=True)

#Convert attndDate to datetime
ModelData['attndDate'] = pd.to_datetime(ModelData['attndDate'], format=date_format)

ModelData

Unnamed: 0,attndDate,attndLocation,attendeesCount,dhTemp (C),dlTemp (C),precipitation (mm),humidity,wind (km/h)
0,2023-11-17,"Anuradhapura, Sri Lanka",0.0,32,26,4.3,0.8684,5.3
1,2023-11-17,"Avissawella, Sri Lanka",0.0,32,26,4.3,0.8684,5.3
2,2023-11-17,"Badulla, Sri Lanka",0.0,32,26,4.3,0.8684,5.3
3,2023-11-17,"Bandaragama, Sri Lanka",1.0,32,26,4.3,0.8684,5.3
4,2023-11-17,"Bandarawela, Sri Lanka",0.0,32,26,4.3,0.8684,5.3
...,...,...,...,...,...,...,...,...
163,2023-11-20,"Nugegoda, Sri Lanka",0.0,31,24,2.7,0.8805,5.0
164,2023-11-20,"Piliyandala, Sri Lanka",0.0,31,24,2.7,0.8805,5.0
165,2023-11-20,"Rajagiriya, Sri Lanka",0.0,31,24,2.7,0.8805,5.0
166,2023-11-20,"Ratnapura, Sri Lanka",2.0,31,24,2.7,0.8805,5.0


# Data Modeling for Predictions

In [45]:
#Divide the ModelData into Train and Test sets as attndDate is a time series
train = ModelData[ModelData['attndDate'] < '2023-11-20']
test = ModelData[ModelData['attndDate'] >= '2023-11-20']

#Select the relevant columns from the model data for training
model_data_subset = train[['attndDate', 'attndLocation', 'attendeesCount', 'dhTemp (C)', 'dlTemp (C)', 'precipitation (mm)', 'humidity', 'wind (km/h)']]

#Create a dictionary to store the random forest models for each location
location_models = {}

#Iterate over each unique location in the model data
for location in model_data_subset['attndLocation'].unique():
    #Filter the data for the current location
    location_data = model_data_subset[model_data_subset['attndLocation'] == location]
    
    #Create the feature matrix X and target variable y
    X = location_data[['dhTemp (C)', 'dlTemp (C)', 'precipitation (mm)', 'humidity', 'wind (km/h)']]
    y = location_data['attendeesCount']
    
    #Fit a random forest model
    model = RandomForestRegressor()
    model.fit(X, y)
    
    #Store the model for the current location
    location_models[location] = model

#Predict the attendeeCount for the predictive_row data
predicted_attendee_counts = []
for index, row in test.iterrows():
    #Get the location from the test data
    location = row['attndLocation']
    
    #Get the corresponding model for the location
    model = location_models[location]
    
    #Create the feature matrix X for the current row
    X = row[['dhTemp (C)', 'dlTemp (C)', 'precipitation (mm)', 'humidity', 'wind (km/h)']].values.reshape(1, -1)
    
    #Predict the attendeeCount for the current row
    predicted_attendee_count = model.predict(X)[0]
    
    #Append the predicted attendeeCount to the list
    predicted_attendee_counts.append(predicted_attendee_count)

#Add the predicted_attendee_counts as a new column to the predictive_row data
test['predicted_attendeeCount'] = predicted_attendee_counts

#Convert the predicted_attendeeCount to an integer
test['predicted_attendeeCount'] = test['predicted_attendeeCount'].astype(int)

#Display the predictive_row data with the predicted attendeeCount
test


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomFo

Unnamed: 0,attndDate,attndLocation,attendeesCount,dhTemp (C),dlTemp (C),precipitation (mm),humidity,wind (km/h),predicted_attendeeCount
126,2023-11-20,"Anuradhapura, Sri Lanka",0.0,31,24,2.7,0.8805,5.0,0
127,2023-11-20,"Avissawella, Sri Lanka",1.0,31,24,2.7,0.8805,5.0,0
128,2023-11-20,"Badulla, Sri Lanka",0.0,31,24,2.7,0.8805,5.0,0
129,2023-11-20,"Bandaragama, Sri Lanka",0.0,31,24,2.7,0.8805,5.0,0
130,2023-11-20,"Bandarawela, Sri Lanka",0.0,31,24,2.7,0.8805,5.0,0
131,2023-11-20,"Biyagama, Sri Lanka",0.0,31,24,2.7,0.8805,5.0,0
132,2023-11-20,"Chilaw, Sri Lanka",0.0,31,24,2.7,0.8805,5.0,0
133,2023-11-20,"Colombo, Sri Lanka",1.0,31,24,2.7,0.8805,5.0,3
134,2023-11-20,"Dehiwala, Sri Lanka",0.0,31,24,2.7,0.8805,5.0,0
135,2023-11-20,"Galle, Sri Lanka",3.0,31,24,2.7,0.8805,5.0,0


In [46]:
#Calculate the mean absolute error for the predictions
mae = mean_absolute_error(test['attendeesCount'], test['predicted_attendeeCount'])
print('Mean Absolute Error:', mae)

#Calculate the r2 score for the predictions
r2 = r2_score(test['attendeesCount'], test['predicted_attendeeCount'])
print('R2 Score:', r2)

Mean Absolute Error: 0.6190476190476191
R2 Score: -0.13076923076923075


In [47]:
#convert attndDate to datetime in weather_climate_data to datetime
weather_climate_data['attndDate'] = pd.to_datetime(weather_climate_data['attndDate'], format=date_format)

#Get attndDate 2023-11-21 as the predictive row
prediction_data = weather_climate_data[weather_climate_data['attndDate'] == '2023-11-21']

#Remove eventLocation column
prediction_data = prediction_data.drop(['eventLocation'], axis=1)

#Get all unique attndlocation from the Locations dataframe
all_locations = AttendeesLocations['attndLocation'].unique()

#Duplicate the prediction_data row for each location
prediction_data = pd.concat([prediction_data] * len(all_locations), ignore_index=True)

#Add the Location column to the prediction_data dataframe
prediction_data['attndLocation'] = all_locations

prediction_data

Unnamed: 0,attndDate,dhTemp (C),dlTemp (C),precipitation (mm),humidity,wind (km/h),attndLocation
0,2023-11-21,32,25,5.6,0.8893,6.3,"Anuradhapura, Sri Lanka"
1,2023-11-21,32,25,5.6,0.8893,6.3,"Avissawella, Sri Lanka"
2,2023-11-21,32,25,5.6,0.8893,6.3,"Badulla, Sri Lanka"
3,2023-11-21,32,25,5.6,0.8893,6.3,"Bandaragama, Sri Lanka"
4,2023-11-21,32,25,5.6,0.8893,6.3,"Bandarawela, Sri Lanka"
5,2023-11-21,32,25,5.6,0.8893,6.3,"Biyagama, Sri Lanka"
6,2023-11-21,32,25,5.6,0.8893,6.3,"Chilaw, Sri Lanka"
7,2023-11-21,32,25,5.6,0.8893,6.3,"Colombo, Sri Lanka"
8,2023-11-21,32,25,5.6,0.8893,6.3,"Dehiwala, Sri Lanka"
9,2023-11-21,32,25,5.6,0.8893,6.3,"Galle, Sri Lanka"


In [48]:
#Select the relevant columns from the model data
model_data_subset = ModelData[['attndDate', 'attndLocation', 'attendeesCount', 'dhTemp (C)', 'dlTemp (C)', 'precipitation (mm)', 'humidity', 'wind (km/h)']]

#Create a dictionary to store the random forest models for each location
location_models = {}

#Iterate over each unique location in the model data
for location in model_data_subset['attndLocation'].unique():
    #Filter the data for the current location
    location_data = model_data_subset[model_data_subset['attndLocation'] == location]
    
    #Create the feature matrix X and target variable y
    X = location_data[['dhTemp (C)', 'dlTemp (C)', 'precipitation (mm)', 'humidity', 'wind (km/h)']]
    y = location_data['attendeesCount']
    
    #Fit a random forest model
    model = RandomForestRegressor()
    model.fit(X, y)
    
    #Store the model for the current location
    location_models[location] = model

In [49]:
#Predict the attendeeCount for the predictive_row data
predicted_attendee_counts = []
for index, row in prediction_data.iterrows():
    #Get the location from the predictive_row data
    location = row['attndLocation']
    
    #Get the corresponding model for the location
    model = location_models[location]
    
    #Create the feature matrix X for the current row
    X = row[['dhTemp (C)', 'dlTemp (C)', 'precipitation (mm)', 'humidity', 'wind (km/h)']].values.reshape(1, -1)
    
    #Predict the attendeeCount for the current row
    predicted_attendee_count = model.predict(X)[0]
    
    #Append the predicted attendeeCount to the list
    predicted_attendee_counts.append(predicted_attendee_count)

#Add the predicted_attendee_counts as a new column to the predictive_row data
prediction_data['predicted_attendeeCount'] = predicted_attendee_counts

#Convert the predicted_attendeeCount to an integer
prediction_data['predicted_attendeeCount'] = prediction_data['predicted_attendeeCount'].astype(int)

#Display the predictive_row data with the predicted attendeeCount
prediction_data


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomForestRegressor was fitted with feature names


X does not have valid feature names, but RandomFo

Unnamed: 0,attndDate,dhTemp (C),dlTemp (C),precipitation (mm),humidity,wind (km/h),attndLocation,predicted_attendeeCount
0,2023-11-21,32,25,5.6,0.8893,6.3,"Anuradhapura, Sri Lanka",1
1,2023-11-21,32,25,5.6,0.8893,6.3,"Avissawella, Sri Lanka",0
2,2023-11-21,32,25,5.6,0.8893,6.3,"Badulla, Sri Lanka",0
3,2023-11-21,32,25,5.6,0.8893,6.3,"Bandaragama, Sri Lanka",0
4,2023-11-21,32,25,5.6,0.8893,6.3,"Bandarawela, Sri Lanka",0
5,2023-11-21,32,25,5.6,0.8893,6.3,"Biyagama, Sri Lanka",0
6,2023-11-21,32,25,5.6,0.8893,6.3,"Chilaw, Sri Lanka",0
7,2023-11-21,32,25,5.6,0.8893,6.3,"Colombo, Sri Lanka",3
8,2023-11-21,32,25,5.6,0.8893,6.3,"Dehiwala, Sri Lanka",0
9,2023-11-21,32,25,5.6,0.8893,6.3,"Galle, Sri Lanka",0


In [50]:
#Rename predicted_attendeeCount to attendeesCount
prediction_data.rename(columns={'predicted_attendeeCount': 'attendeesCount'}, inplace=True)

#Get only attndDate, attndLocation and attendeesCount columns
prediction_data = prediction_data[['attndDate', 'attndLocation', 'attendeesCount']]

prediction_data

Unnamed: 0,attndDate,attndLocation,attendeesCount
0,2023-11-21,"Anuradhapura, Sri Lanka",1
1,2023-11-21,"Avissawella, Sri Lanka",0
2,2023-11-21,"Badulla, Sri Lanka",0
3,2023-11-21,"Bandaragama, Sri Lanka",0
4,2023-11-21,"Bandarawela, Sri Lanka",0
5,2023-11-21,"Biyagama, Sri Lanka",0
6,2023-11-21,"Chilaw, Sri Lanka",0
7,2023-11-21,"Colombo, Sri Lanka",3
8,2023-11-21,"Dehiwala, Sri Lanka",0
9,2023-11-21,"Galle, Sri Lanka",0


In [51]:
#Add the prediction_data to the AttendeesLocations dataframe
NewAttendeesLocations = pd.concat([AttendeesLocations, prediction_data], ignore_index=True)

#Convert attndDate to datetime for %Y-%m-%d format
NewAttendeesLocations['attndDate'] = pd.to_datetime(NewAttendeesLocations['attndDate']).dt.strftime('%Y-%m-%d')

NewAttendeesLocations

Unnamed: 0,attndDate,attndLocation,attendeesCount
0,2023-11-17,"Anuradhapura, Sri Lanka",0.0
1,2023-11-17,"Avissawella, Sri Lanka",0.0
2,2023-11-17,"Badulla, Sri Lanka",0.0
3,2023-11-17,"Bandaragama, Sri Lanka",1.0
4,2023-11-17,"Bandarawela, Sri Lanka",0.0
...,...,...,...
205,2023-11-21,"Nugegoda, Sri Lanka",0.0
206,2023-11-21,"Piliyandala, Sri Lanka",0.0
207,2023-11-21,"Rajagiriya, Sri Lanka",0.0
208,2023-11-21,"Ratnapura, Sri Lanka",0.0


In [52]:
weather_climate_data

Unnamed: 0,eventLocation,attndDate,dhTemp (C),dlTemp (C),precipitation (mm),humidity,wind (km/h)
0,"BMICH, Colombo",2023-11-17,32,26,4.3,0.8684,5.3
1,"BMICH, Colombo",2023-11-18,31,25,6.7,0.8834,6.3
2,"BMICH, Colombo",2023-11-19,31,24,4.1,0.8937,6.9
3,"BMICH, Colombo",2023-11-20,31,24,2.7,0.8805,5.0
4,"BMICH, Colombo",2023-11-21,32,25,5.6,0.8893,6.3


In [53]:
#Daily High Temperature & Daily Lower Temperature Variation
temperature_graph = px.line(weather_climate_data, x="attndDate", y=["dlTemp (C)","dhTemp (C)"], title='Daily High & Daily Lower Temperature Variation')
temperature_graph.update_xaxes(title_text='Date')
temperature_graph.update_yaxes(title_text='Temperature (C)')
temperature_graph.update_layout(legend_title_text='Temperature')
temperature_graph.for_each_trace(lambda t: t.update(name=t.name.replace("dhTemp (C)", "Daily High Temperature")))
temperature_graph.for_each_trace(lambda t: t.update(name=t.name.replace("dlTemp (C)", "Daily Lower Temperature")))
temperature_graph.update_layout(template='plotly_dark')
temperature_graph.show()

#Save the graph as a html file
temperature_graph.write_html(os.path.join(assests_dir, 'temperature_graph.html'))

#get the html for the temperature_graph
temperature_graph_html = temperature_graph.to_html()

#path
temperature_graph_path = os.path.join(assests_dir, 'temperature_graph.html')

In [54]:
# Daily Precipitation Variation
precipitation_graph = px.bar(weather_climate_data, x="attndDate", y="precipitation (mm)", title='Daily Precipitation Variation')
precipitation_graph.update_xaxes(title_text='Date')
precipitation_graph.update_yaxes(title_text='Precipitation (mm)')
precipitation_graph.update_layout(template='plotly_dark')
precipitation_graph.show()

#Save the graph as a html file
precipitation_graph.write_html(os.path.join(assests_dir, 'precipitation_graph.html'))

#get the html for the precipitation_graph
precipitation_graph_html = precipitation_graph.to_html(full_html=False, include_plotlyjs='cdn')

#path
precipitation_graph_path = os.path.join(assests_dir, 'precipitation_graph.html')

In [55]:
#Daily Humidity Variation
humidity_graph = px.line(weather_climate_data, x="attndDate", y="humidity", title='Daily Humidity Variation')
humidity_graph.update_traces(line_color='green')
humidity_graph.update_xaxes(title_text='Date')
humidity_graph.update_yaxes(title_text='Humidity')
humidity_graph.update_layout(template='plotly_dark')
humidity_graph.show()

#Save the graph as a html file
humidity_graph.write_html(os.path.join(assests_dir, 'humidity_graph.html'))

#get the html to humidity_graph
humidity_graph_html = humidity_graph.to_html()

#path
humidity_graph_path = os.path.join(assests_dir, 'humidity_graph.html')

In [56]:
#Daily Wind Variation
wind_graph = px.line(weather_climate_data, x="attndDate", y="wind (km/h)", title='Daily Wind Variation')
wind_graph.update_traces(line_color='orange')
wind_graph.update_xaxes(title_text='Date')
wind_graph.update_yaxes(title_text='Wind (km/h)')
wind_graph.update_layout(template='plotly_dark')
wind_graph.show()

#Save the graph as a html file
wind_graph.write_html(os.path.join(assests_dir, 'wind_graph.html'))

#Get the html og wind_graph
wind_graph_html = wind_graph.to_html(full_html=False)

#path
wind_graph_path = os.path.join(assests_dir, 'wind_graph.html')

In [57]:
#Create a function to update the map based on the selected date
def update_map(date):
    #Recreate the map centered on Sri Lanka BMICH, Colombo
    map_srilanka = folium.Map(location=[6.9011072, 79.8736181], zoom_start=8.5)

    #Add a marker for the EventLocation
    folium.Marker(
        location=[event_location['lat'].values[0], event_location['lng'].values[0]],
        popup='Event Location: BMICH, Colombo',
        icon=folium.Icon(color='black', icon='star')
    ).add_to(map_srilanka)

    #Filter the data based on the selected date
    filtered_data = NewAttendeesLocations[NewAttendeesLocations['attndDate'] == date]

    #Create a list of locations and weighted attendee counts
    heat_data = []
    for index, row in filtered_data.iterrows():
        location = attendee_locations[attendee_locations['location'] == row['attndLocation']]
        heat_data.append([location['lat'].values[0], location['lng'].values[0], row['attendeesCount']])

    #Add the heat map layer
    HeatMap(heat_data).add_to(map_srilanka)

    #Display the updated map
    display(map_srilanka)

In [58]:
#Create a tab widget
tab = widgets.Tab()

#Create a header title for the application
header_title = widgets.HTML('<h1>Colombo Motor Show 2023 Attendance Prediction</h1>')

#Create a list to store the tab titles and contents
tab_titles = ['Map', 'Weather & Climate']
tab_contents = [
    #Create the content for the first tab (Map)
    widgets.HBox([
        #Create the left side with the table
        widgets.VBox([
            widgets.Label('Select Date:', style={'font-weight': 'bold', 'font-size': '14px'}),
            widgets.SelectionSlider(
                options=NewAttendeesLocations['attndDate'].unique(),
                continuous_update=False,
                layout=widgets.Layout(width='90%', height='auto'),
                style={'description_width': 'initial'}
            ),
            widgets.Output(layout=widgets.Layout(height='200px'))
        ], layout=widgets.Layout(width='50%', border='1px solid #ccc', padding='10px')),
        #Create the right side with the map
        widgets.VBox([
            widgets.Output(layout=widgets.Layout(height='400px'))
        ], layout=widgets.Layout(width='70%', border='1px solid #ccc', padding='10px'))
    ], layout=widgets.Layout(justify_content='space-between')),
    #Create the content for the second tab (Weather & Climate)
    widgets.HBox([
        widgets.VBox([
            widgets.Label('Select Graph:', style={'font-weight': 'bold', 'font-size': '14px'}),
            widgets.Dropdown(
                options=['Temperature', 'Precipitation', 'Humidity', 'Wind'],
                value='Temperature',
                layout=widgets.Layout(width='50%', height='auto'),
                style={'description_width': 'initial'}
            ),
            widgets.Output(layout=widgets.Layout(height='400px')),
        ], layout=widgets.Layout(width='100%', padding='20px'))
    ], layout=widgets.Layout(width='100%', border='1px solid #ccc', padding='10px'))
]

In [59]:
#Set the tab titles and contents
tab.children = tab_contents
for i, title in enumerate(tab_titles):
    tab.set_title(i, title)

#Define a function to update the map and table outputs
def update_map_and_table_output(change):
    with tab.children[0].children[0].children[2]:
        #Clear the previous table output
        clear_output(wait=True)

        #Get the selected date
        date = change['new']

        #Filter the data based on the selected date
        filtered_data = NewAttendeesLocations[NewAttendeesLocations['attndDate'] == date]

        #Rename the columns
        filtered_data = filtered_data.rename(
            columns={'attndDate': 'Attended Date', 'attndLocation': 'Attended From', 'attendeesCount': 'No of Attendees'})

        #Display the filtered data as a table
        display(filtered_data)

    with tab.children[0].children[1].children[0]:
        #Clear the previous map output
        clear_output(wait=True)

        #Call the update_map function with the selected date
        update_map(date)

#Define a function to update the selected graph based on dropdown value
def update_selected_graph(change):
    with tab.children[1].children[0].children[2]:
        #Clear the previous graph output
        clear_output(wait=True)

        #Get the selected graph title
        selected_graph = change['new']

        #Display the corresponding graph
        if selected_graph == 'Temperature':
            display(go.FigureWidget(temperature_graph))
        elif selected_graph == 'Precipitation':
            display(go.FigureWidget(precipitation_graph))
        elif selected_graph == 'Humidity':
            display(go.FigureWidget(humidity_graph))
        elif selected_graph == 'Wind':
            display(go.FigureWidget(wind_graph))

In [60]:
#Register the functions as callbacks
tab.children[0].children[0].children[1].observe(update_map_and_table_output, names='value')
tab.children[1].children[0].children[1].observe(update_selected_graph, names='value')

In [61]:
#Apply CSS styling to the tab widget and its children
tab.add_class('custom-tab')
tab.set_title(0, 'Map')
tab.set_title(1, 'Weather & Climate')
tab.children[0].children[0].add_class('custom-left-side')
tab.children[0].children[1].add_class('custom-right-side')
tab.children[1].add_class('custom-content')

#Create a VBox to hold the header title and the tab widget
app_layout = widgets.VBox([header_title, tab])

#Apply CSS styling to the application layout
app_layout.add_class('custom-app-layout')

#Apply CSS styling to the application layout
html_content = '''
<style>
.custom-tab .p-TabBar-tab {
    background-color: #f0f0f0;
    border: 1px solid #ccc;
    border-radius: 5px;
    padding: 5px 10px;
    margin-right: 5px;
    font-weight: bold;
    font-size: 14px;
    font-family: Arial, sans-serif;  /* Use Arial font family */
}

.custom-tab .p-TabBar-tab.p-mod-selected {
    background-color: #fff;
    border-bottom-color: #fff;
}

.custom-left-side {
    background-color: #f9f9f9;
    border: 1px solid #ccc;
    border-radius: 5px;
    padding: 10px;
    font-weight: italic;  /* Make font bold */
    font-size: 13px;
    font-family: Arial, sans-serif;  /* Use Arial font family */
}

.custom-right-side {
    background-color: #f9f9f9;
    border: 1px solid #ccc;
    border-radius: 5px;
    padding: 10px;
    font-weight: bold;  /* Make font bold */
    font-size: 14px;
    font-family: Arial, sans-serif;  /* Use Arial font family */
}

.custom-content {
    background-color: #f9f9f9;
    border: 1px solid #ccc;
    border-radius: 5px;
    padding: 10px;
    font-weight: bold;  /* Make font bold */
    font-size: 16px;
    font-family: Arial, sans-serif;  /* Use Arial font family */
}

.custom-left-side .widget-label {
    font-weight: bold;
    font-size: 14px;
    font-family: Arial, sans-serif;  /* Use Arial font family */
}

.custom-left-side .widget-slider {
    width: 90%;
    height: auto;
}

.custom-left-side .widget-output {
    height: 200px;
}

.custom-app-layout {
    padding: 20px;
    background-color: #f9f9f9;
    border: 10px solid #ccc;
    border-radius: 5px;
    font-family: Arial, sans-serif;  /* Use Arial font family */
}
</style>
'''

#Display the CSS styling
display(HTML(html_content))

#Display the application layout
display(app_layout)

VBox(children=(HTML(value='<h1>Colombo Motor Show 2023 Attendance Prediction</h1>'), Tab(children=(HBox(childr…