In [1]:
import pandas as pd
import numpy as np

# Load the EV charging sessions dataset
df = pd.read_csv('ev_charging_sessions.csv')

print("EV Charging Sessions Dataset Loaded Successfully!")
print(f"\nDataset shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print("\nFirst 5 records:")
print(df.head())
print("\nData types:")
print(df.dtypes)
print("\nBasic statistics:")
print(df.describe())

EV Charging Sessions Dataset Loaded Successfully!

Dataset shape: (3500, 10)
Columns: ['session_id', 'user_id', 'vehicle_id', 'station_id', 'start_time', 'end_time', 'duration_min', 'energy_kWh', 'session_day', 'session_type']

First 5 records:
  session_id user_id vehicle_id station_id           start_time  \
0     CS0001    U339       V347       S091  2024-11-11 12:09:00   
1     CS0002    U286       V463       S025  2024-11-10 19:51:00   
2     CS0003    U092       V419       S007  2024-11-26 18:46:00   
3     CS0004    U369       V070       S008  2024-11-28 19:53:00   
4     CS0005    U185       V298       S037  2024-11-27 13:09:00   

              end_time  duration_min  energy_kWh session_day session_type  
0  2024-11-11 13:26:00            77       26.87     Weekday   Occasional  
1  2024-11-10 21:28:00            97       67.47     Weekend    Emergency  
2  2024-11-26 20:43:00           117       60.16     Weekend      Regular  
3  2024-11-28 21:42:00           109       39.19

# Data Quality Check

In [2]:
# Check for missing values
print("Missing values per column:")
print(df.isnull().sum())
print(f"\nTotal missing values: {df.isnull().sum().sum()}")

# Check for duplicates
print(f"\nDuplicate rows: {df.duplicated().sum()}")

# Data info
print("\nDataset Info:")
df.info()


Missing values per column:
session_id      0
user_id         0
vehicle_id      0
station_id      0
start_time      0
end_time        0
duration_min    0
energy_kWh      0
session_day     0
session_type    0
dtype: int64

Total missing values: 0

Duplicate rows: 0

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3500 entries, 0 to 3499
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   session_id    3500 non-null   object 
 1   user_id       3500 non-null   object 
 2   vehicle_id    3500 non-null   object 
 3   station_id    3500 non-null   object 
 4   start_time    3500 non-null   object 
 5   end_time      3500 non-null   object 
 6   duration_min  3500 non-null   int64  
 7   energy_kWh    3500 non-null   float64
 8   session_day   3500 non-null   object 
 9   session_type  3500 non-null   object 
dtypes: float64(1), int64(1), object(8)
memory usage: 273.6+ KB


# Dataset Summary and Unique Values

In [3]:
# Unique values in categorical columns
print("Unique values in categorical columns:")
print(f"\nSession Types: {df['session_type'].unique()}")
print(f"Session Type Counts:\n{df['session_type'].value_counts()}")

print(f"\nSession Days: {df['session_day'].unique()}")
print(f"Session Day Counts:\n{df['session_day'].value_counts()}")

print(f"\nUnique Users: {df['user_id'].nunique()}")
print(f"Unique Vehicles: {df['vehicle_id'].nunique()}")
print(f"Unique Stations: {df['station_id'].nunique()}")


Unique values in categorical columns:

Session Types: ['Occasional' 'Emergency' 'Regular']
Session Type Counts:
session_type
Emergency     1198
Occasional    1152
Regular       1150
Name: count, dtype: int64

Session Days: ['Weekday' 'Weekend']
Session Day Counts:
session_day
Weekday    1760
Weekend    1740
Name: count, dtype: int64

Unique Users: 500
Unique Vehicles: 499
Unique Stations: 100


In [4]:
# Display the full dataframe
df

Unnamed: 0,session_id,user_id,vehicle_id,station_id,start_time,end_time,duration_min,energy_kWh,session_day,session_type
0,CS0001,U339,V347,S091,2024-11-11 12:09:00,2024-11-11 13:26:00,77,26.87,Weekday,Occasional
1,CS0002,U286,V463,S025,2024-11-10 19:51:00,2024-11-10 21:28:00,97,67.47,Weekend,Emergency
2,CS0003,U092,V419,S007,2024-11-26 18:46:00,2024-11-26 20:43:00,117,60.16,Weekend,Regular
3,CS0004,U369,V070,S008,2024-11-28 19:53:00,2024-11-28 21:42:00,109,39.19,Weekday,Emergency
4,CS0005,U185,V298,S037,2024-11-27 13:09:00,2024-11-27 14:28:00,79,61.71,Weekend,Occasional
...,...,...,...,...,...,...,...,...,...,...
3495,CS3496,U357,V387,S094,2024-11-12 19:19:00,2024-11-12 21:00:00,101,72.29,Weekday,Regular
3496,CS3497,U328,V308,S076,2024-11-20 18:50:00,2024-11-20 20:04:00,74,57.34,Weekend,Emergency
3497,CS3498,U358,V110,S011,2024-11-05 13:46:00,2024-11-05 15:33:00,107,29.87,Weekend,Regular
3498,CS3499,U396,V475,S092,2024-11-25 22:08:00,2024-11-25 23:20:00,72,30.67,Weekday,Emergency


In [5]:
# Verify no missing values (dataset is clean)
print("Checking for any missing values:")
print(df.isnull().sum())
print(f"\nDataset is clean: {df.isnull().sum().sum() == 0}")

Checking for any missing values:
session_id      0
user_id         0
vehicle_id      0
station_id      0
start_time      0
end_time        0
duration_min    0
energy_kWh      0
session_day     0
session_type    0
dtype: int64

Dataset is clean: True
