# Traffic Dataset
### Akash Shanmugaraj

In [2]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import random
import datetime
import calendar

df = pd.read_csv('traffic.csv')


In [3]:
df['DateTime']
date = []
time = []

for datetimeentry in df['DateTime']:
    # date.append(datetimeentry.split(' ')[0])
    d,t = datetimeentry.split(' ')
    date.append(d)
    time.append(t)

# print(l)

In [4]:
def findDay(date):
    born = datetime.datetime.strptime(date, '%Y-%m-%d').weekday()
    return (calendar.day_name[born])

day = []
for dateentry in date:
    day.append(findDay(dateentry))

df['Day'] = day
df['Time'] = time
print(df.head())

# save the dataset
# df.to_csv('traffic.csv', index=False)

              DateTime  Junction  Vehicles           ID     Day      Time
0  2015-11-01 00:00:00         1        15  20151101001  Sunday  00:00:00
1  2015-11-01 01:00:00         1        13  20151101011  Sunday  01:00:00
2  2015-11-01 02:00:00         1        10  20151101021  Sunday  02:00:00
3  2015-11-01 03:00:00         1         7  20151101031  Sunday  03:00:00
4  2015-11-01 04:00:00         1         9  20151101041  Sunday  04:00:00


In [5]:
# generate random values for the weather
weather = []
for i in range(len(df)):
    weather.append(random.choice(['Sunny', 'Rainy', 'Cloudy', 'Snowy']))

df['Weather'] = weather

### Simulate Public Holiday

In [6]:
public_holiday = []
visited = dict()
for dateentry in date:
    if dateentry in visited:
        public_holiday.append(visited[dateentry])
        continue
    if random.random() > 0.95:
        public_holiday.append(1)
        visited[dateentry] = 1
    else:
        public_holiday.append(0)
        visited[dateentry] = 0

df['PublicHoliday'] = public_holiday


### Encode Date/Day

In [7]:
label_encoder = LabelEncoder()

day_encoded = label_encoder.fit_transform(df['Day'])

df['Day_Encoded'] = day_encoded

print(df[['Day', 'Day_Encoded']].head())

day_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("Day mapping:", day_mapping)

      Day  Day_Encoded
0  Sunday            3
1  Sunday            3
2  Sunday            3
3  Sunday            3
4  Sunday            3
Day mapping: {'Friday': 0, 'Monday': 1, 'Saturday': 2, 'Sunday': 3, 'Thursday': 4, 'Tuesday': 5, 'Wednesday': 6}


### Encode Weather

In [8]:

weather_encoded = label_encoder.fit_transform(df['Weather'])

df['Weather_Encoded'] = weather_encoded

print(df[['Weather', 'Weather_Encoded']].head())

weather_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("Weather mapping:", weather_mapping)

  Weather  Weather_Encoded
0   Rainy                1
1  Cloudy                0
2   Sunny                3
3  Cloudy                0
4   Snowy                2
Weather mapping: {'Cloudy': 0, 'Rainy': 1, 'Snowy': 2, 'Sunny': 3}


### Encode Time

In [9]:
time_encoded = label_encoder.fit_transform(df['Time'])

df['Time_Encoded'] = time_encoded

print(df[['Time', 'Time_Encoded']].head())

time_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("Time mapping:", time_mapping)

       Time  Time_Encoded
0  00:00:00             0
1  01:00:00             1
2  02:00:00             2
3  03:00:00             3
4  04:00:00             4
Time mapping: {'00:00:00': 0, '01:00:00': 1, '02:00:00': 2, '03:00:00': 3, '04:00:00': 4, '05:00:00': 5, '06:00:00': 6, '07:00:00': 7, '08:00:00': 8, '09:00:00': 9, '10:00:00': 10, '11:00:00': 11, '12:00:00': 12, '13:00:00': 13, '14:00:00': 14, '15:00:00': 15, '16:00:00': 16, '17:00:00': 17, '18:00:00': 18, '19:00:00': 19, '20:00:00': 20, '21:00:00': 21, '22:00:00': 22, '23:00:00': 23}


In [10]:
df.head()

Unnamed: 0,DateTime,Junction,Vehicles,ID,Day,Time,Weather,PublicHoliday,Day_Encoded,Weather_Encoded,Time_Encoded
0,2015-11-01 00:00:00,1,15,20151101001,Sunday,00:00:00,Rainy,0,3,1,0
1,2015-11-01 01:00:00,1,13,20151101011,Sunday,01:00:00,Cloudy,0,3,0,1
2,2015-11-01 02:00:00,1,10,20151101021,Sunday,02:00:00,Sunny,0,3,3,2
3,2015-11-01 03:00:00,1,7,20151101031,Sunday,03:00:00,Cloudy,0,3,0,3
4,2015-11-01 04:00:00,1,9,20151101041,Sunday,04:00:00,Snowy,0,3,2,4


In [None]:
c = df["PublicHoliday"].value_counts()
print(c)

PublicHoliday
0    45648
1     2472
Name: count, dtype: int64


In [None]:
from sklearn.model_selection import train_test_split 
X = df.drop(columns=['DateTime', 'ID', 'Day', 'Time', 'Weather', 'Vehicles', 'PublicHoliday', 'Weather_Encoded, '])
y = df['Vehicles']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [21]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Transform the features to polynomial features
poly = PolynomialFeatures(degree=5) # You can change the degree of the polynomial
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Initialize the model
polynomial_regressor = LinearRegression()

# Train the model
polynomial_regressor.fit(X_train_poly, y_train)

# Make predictions
y_pred_poly = polynomial_regressor.predict(X_test_poly)

# Evaluate the model
mse_poly = mean_squared_error(y_test, y_pred_poly)
r2_poly = r2_score(y_test, y_pred_poly)

print(f"Mean Squared Error (Polynomial Regression): {mse_poly}")
print(f"R2 Score (Polynomial Regression): {r2_poly}")

# Calculate accuracy
accuracy_poly = polynomial_regressor.score(X_test_poly, y_test)
print(f"Accuracy (Polynomial Regression): {accuracy_poly}")

Mean Squared Error (Polynomial Regression): 154.4924944791372
R2 Score (Polynomial Regression): 0.6510342336782202
Accuracy (Polynomial Regression): 0.6510342336782202
