In [None]:
import Graphs
import numpy as np
import pandas as pd
import math
from collections import Counter
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
data_name = 'ev_charging_patterns.csv'
data = pd.read_csv(data_name)
data = data.dropna()
data

In [None]:
dist_models = data['Vehicle Model'].value_counts()
dist_models

In [None]:
dist_charging_loc = data['Charging Station Location'].value_counts()
dist_charging_loc

In [None]:
unique_chargers = data['Charging Station ID'].value_counts()
unique_chargers

In [None]:
locaiton_temperatures = data.groupby('Charging Station Location')['Temperature (°C)'].apply(lambda temps: [t* 9/5 + 32 for t in temps])
locaiton_temperatures

In [None]:
locaiton_temperatures_var = data.groupby('Charging Station Location')['Temperature (°C)'].apply(lambda temps: np.var([t* 9/5 + 32 for t in temps])) 
locaiton_temperatures_var

compares the min,mean, max of each cities temp

In [None]:
Graphs.grouped_bar_graph_var(locaiton_temperatures, list(locaiton_temperatures.keys()), 'City', 'Temperature (°F)', 10, 'Temperature by City')

In [None]:
encoded_location_temp = data.groupby('Charging Station Location')['Temperature (°C)'].apply(
    lambda temps: temps.apply(lambda t: 'Hot' if t > 30 else ('Normal' if t > 15 else 'Cold')).tolist()
)

encoded_location_temp

In [None]:
for city in list(encoded_location_temp.keys()):
    print(city)
    print(pd.Series(encoded_location_temp[city]).value_counts())

In [None]:
locaiton_charging_rates = data.groupby('Charging Station Location')['Charging Rate (kW)'].apply(lambda temps: [t for t in temps])
locaiton_charging_rates

Generates Graphs of a given city and its charging rate

In [None]:
Graphs.grouped_bar_graph_var(locaiton_charging_rates, list(locaiton_charging_rates.keys()), 'City', 'Charging Rates(kW)', 10, 'Charging Rate Per City')

In [None]:
loc = 'Chicago'
charging_data = locaiton_charging_rates[loc]

# Define colors and cycle through them
colors = ['red', 'blue', 'green', 'orange', 'purple', 'lime']
bar_colors = [colors[i % len(colors)] for i in range(len(charging_data))]


plt.figure(figsize=(10, 6))
plt.bar(range(len(charging_data)), charging_data, color=bar_colors, width=2)
plt.title(f'Charging Rates in {loc}')
plt.xlabel('Charging Stations')
plt.ylabel('Charging Rate (kW)')
plt.xticks([])
plt.tight_layout()

plt.show()

Generates a bar graph comparing the mean, low, and high of each city

In [None]:
locaiton_charger_types = data.groupby('Charging Station Location')['Charger Type'].apply(list)
locaiton_charger_types

Generates a bar graph of each charger type in each city

In [None]:
compare_charging_data = []
cities = list(locaiton_charger_types.keys())
city_labels = []
colors = ['red', 'blue', 'green', 'orange', 'purple']

for i, city in enumerate(cities):
    rates = locaiton_charger_types[city]
    vals = Counter(rates)
    compare_charging_data.extend([vals['Level 1'], vals['Level 2'], vals['DC Fast Charger']])
    city_labels.extend([city] * 3)

plt.figure(figsize=(8, 6))

bar_width = 0.8
positions = np.arange(len(compare_charging_data))

for i, city in enumerate(cities):
    city_position = positions[i * 3: (i + 1) * 3]
    plt.bar(city_position, compare_charging_data[i * 3:(i + 1) * 3], 
            color=colors[i], edgecolor='black', width=bar_width)

plt.title('Charger Type in each City (Level 1, Level 2, DC Fast)')
plt.xlabel('City')
plt.ylabel('Charging Types')

plt.xticks(ticks=np.arange(0, len(compare_charging_data), 3), labels=cities, rotation=0)
plt.tight_layout()
plt.show()

Generates a pie graph for each city's charger type distrbution

In [None]:
loc = 'Houston'
t_o_d = locaiton_charger_types[loc]
Graphs.pie_graph(t_o_d, f'Charger Types in f{loc}')

In [None]:
locaiton_charger_types_rates = data.groupby(['Charging Station Location', 'Charger Type'])['Charging Rate (kW)'].apply(list)
locaiton_charger_types_rates

Generates a bar graph for each charging rate per charger type in a city

In [None]:
loc = 'Houston'
Graphs.grouped_bar_graph_var(locaiton_charger_types_rates[loc], list(locaiton_charger_types_rates[loc].keys()), 'Charging Types', 'Charging Rates (kW)', 5, f'Charging Rate by Type in {loc}')

In [None]:
locaiton_charger_types_costs = data.groupby(['Charging Station Location', 'Charger Type'])['Charging Cost (USD)'].apply(list)
locaiton_charger_types_costs

Generates a bar graph for a city on the cost of charging using a charger type

In [None]:
loc = 'Houston'
Graphs.grouped_bar_graph_var(locaiton_charger_types_costs[loc], list(locaiton_charger_types_costs[loc].keys()), 'Charging Types', 'Charging Cost (USD)', 5, f'Charging Cost by Type in {loc}')

In [None]:
locaiton_time_of_day = data.groupby('Charging Station Location')['Time of Day'].apply(list)
locaiton_time_of_day

Generates the time of day distrubtion pie graph for a given city

In [None]:
loc = 'New York'
t_o_d = locaiton_time_of_day[loc]
Graphs.pie_graph(t_o_d, f'Time of Day for Charging in {loc}')

In [None]:
locaiton_day_of_week = data.groupby('Charging Station Location')['Day of Week'].apply(list)
locaiton_day_of_week

Generates a pie graph for day of weeks where charing occurs


In [None]:
loc = 'Houston'
t_o_d = locaiton_day_of_week[loc]
Graphs.pie_graph(t_o_d, f'TIme Day of Week for Charging in {loc}')

In [None]:
cols = ['Charging Station Location', 'Time of Day', 'Day of Week', 'Temperature (°C)', 'Charger Type', 'Charging Cost (USD)', 'Charging Rate (kW)']
d = data[cols].copy()

labels = {}

for col in ['Charging Station Location', 'Time of Day', 'Day of Week', 'Charger Type']:
    le = LabelEncoder()
    d[col] = le.fit_transform(d[col])
    labels[col] = le

mat = d.corr()

plt.figure(figsize=(10, 8))
sns.heatmap(mat, annot=True, cmap='coolwarm', fmt=".1f")
plt.title("Correlation Matrix of Charging Data Features")
plt.show()

In [None]:
df = data.copy()
categorical_cols = df.select_dtypes(include=['object']).columns

label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))
    label_encoders[col] = le

mat = df.corr()
plt.figure(figsize=(12, 10))
sns.heatmap(mat, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.title("Correlation Matrix of All Features")
plt.show()

In [None]:
enhanced_data = data.copy()
enhanced_data['cost_per_kwh'] = enhanced_data['Energy Consumed (kWh)'] / enhanced_data['Charging Cost (USD)']
enhanced_data['charged_rates_per_hour'] = (enhanced_data['State of Charge (End %)'] - enhanced_data['State of Charge (Start %)']) / enhanced_data['Charging Duration (hours)']
mean = enhanced_data['cost_per_kwh'].mean()
std = enhanced_data['cost_per_kwh'].std()

# filter
enhanced_data = enhanced_data[(enhanced_data['cost_per_kwh'] >= mean - 3 * std) & (enhanced_data['cost_per_kwh'] <= mean + 3 * std)]
print(f"original data count: {len(data)}, after filter: {len(enhanced_data)}")
print(enhanced_data['charged_rates_per_hour'][:10])

In [None]:
locaiton_charging_unit_price = enhanced_data.groupby('Charging Station Location')['cost_per_kwh'].apply(list)
locaiton_charging_unit_price

In [None]:
Graphs.grouped_bar_graph_var(locaiton_charging_unit_price, list(locaiton_charging_unit_price.keys()), 'City', 'Cost (USD/kW)', 1, 'Cost of kwh by City')

In [None]:
temperature_filtered_F = enhanced_data['Temperature (°C)']* 9/5 +32
plt.scatter(temperature_filtered_F, enhanced_data['cost_per_kwh'])

# 添加标题和坐标轴标签
plt.title('charging-temperature and price scatter')
plt.xlabel('temperature (F)')
plt.ylabel('price (USD/Kwh)')

# 显示图形
plt.show()

In [None]:
np.mean(enhanced_data['charged_rates_per_hour'])
negative_charger = enhanced_data[enhanced_data['charged_rates_per_hour'] < 0]
positive_charger = enhanced_data[enhanced_data['charged_rates_per_hour'] >= 0]
print(f"Original average of charging rates per hour: {np.mean(enhanced_data['charged_rates_per_hour'])}")
print(f"Average of negative chargers: {np.mean(negative_charger['charged_rates_per_hour'])} Count: {len(negative_charger)}")
print(f"Average of positive chargers: {np.mean(positive_charger['charged_rates_per_hour'])} Count: {len(positive_charger)}")

In [None]:
negative_charger['Charging Station Location'].value_counts()

In [None]:
mean = positive_charger['charged_rates_per_hour'].mean()
std = positive_charger['charged_rates_per_hour'].std()

# filter
filtered_positive_data = positive_charger[(positive_charger['charged_rates_per_hour'] >= mean - 3 * std) & (positive_charger['charged_rates_per_hour'] <= mean + 3 * std)]

In [None]:
location_charging_unit_speed = filtered_positive_data.groupby('Charging Station Location')['charged_rates_per_hour'].apply(list)
Graphs.grouped_bar_graph_var(location_charging_unit_speed,list(location_charging_unit_speed.keys()), 'City', 'Charge Speed (%/kW)', 4, 'Charging Speed')

In [None]:
Tesla_data = filtered_positive_data[filtered_positive_data['Vehicle Model'] == "Tesla Model 3"]
location_charging_unit_speed = Tesla_data.groupby('Charging Station Location')['charged_rates_per_hour'].apply(list)
Graphs.grouped_bar_graph_var(location_charging_unit_speed,list(location_charging_unit_speed.keys()), 'City', 'Charge Speed (%/kW)', 4, 'Charging Speed')