In [None]:
import pandas as pd
import numpy as np
from tabulate import tabulate
import re
from sklearn.impute import SimpleImputer
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df1 = pd.read_csv('/content/df1.csv')

df1 = df1.dropna()

columns_to_clean = ['Postal Code', 'Model Year', 'Electric Range', 'Base MSRP', 'Legislative District', 'DOL Vehicle ID']

for col in columns_to_clean:
    df1[col] = df1[col].apply(lambda x: re.sub(r'[^0-9]+', '', str(x)))

car_counts = df1.groupby(['County', 'City']).size().reset_index(name='Car Count')

In [None]:
# Group by County and City and count the number of cars
car_counts = df1.groupby(['County', 'City']).size().reset_index(name='Car Count')

county_counts = car_counts.groupby('County')['Car Count'].sum().reset_index()

county_counts = county_counts.sort_values(by='Car Count', ascending=False).reset_index()

county_counts = county_counts.drop(['index'], axis=1)

county_counts

Unnamed: 0,County,Car Count
0,King,99076
1,Snohomish,22643
2,Pierce,14932
3,Clark,11328
4,Thurston,6969
5,Kitsap,6303
6,Spokane,5015
7,Whatcom,4602
8,Benton,2351
9,Skagit,2076


In [None]:
# Example: Select rows by index
countycounts_selected_rows = county_counts.loc[[0, 1, 2, 3, 4, 34, 35, 36, 37, 38]]
countycounts_selected_rows = countycounts_selected_rows.to_latex(index=False, escape=False, column_format='||c c||', caption='Electric Vehicle Population Dataset.', label='table:1')
print(countycounts_selected_rows)

\begin{table}
\caption{Electric Vehicle Population Dataset.}
\label{table:1}
\begin{tabular}{||c c||}
\toprule
County & Car Count \\
\midrule
King & 99076 \\
Snohomish & 22643 \\
Pierce & 14932 \\
Clark & 11328 \\
Thurston & 6969 \\
Adams & 55 \\
Lincoln & 51 \\
Ferry & 35 \\
Columbia & 13 \\
Garfield & 2 \\
\bottomrule
\end{tabular}
\end{table}



In [None]:
df_washington = pd.read_csv('/content/washpop.csv')

df_washington = df_washington.drop(columns=['name', 'pop2020', 'year', 'fips', 'state', 'stateCode', 'growthSince2020', 'slug', 'rank', 'densityMi', 'areaMi'])

df_washington['county'] = df_washington['county'].str.replace(' County', '', regex=False)

df_washington

Unnamed: 0,pop2024,county,landArea,density
0,2277449,King,2116,1076.299149
1,930913,Pierce,1668,558.101319
2,849070,Snohomish,2087,406.837566
3,553170,Spokane,1764,313.588435
4,525563,Clark,628,836.883758
5,299367,Thurston,723,414.062241
6,277503,Kitsap,395,702.539241
7,256267,Yakima,4295,59.666356
8,233240,Whatcom,2108,110.645161
9,217609,Benton,1700,128.005294


In [None]:
# Example: Select rows by index
df_washington_selected_rows = df_washington.loc[[0, 1, 2, 3, 4, 34, 35, 36, 37, 38]]
df_washington_selected_rows = df_washington_selected_rows.to_latex(index=False, escape=False, column_format='||c c||', caption='County Population and Density Dataset')
print(df_washington_selected_rows)

\begin{table}
\caption{County Population and Density Dataset}
\begin{tabular}{||c c||}
\toprule
pop2024 & county & landArea & density \\
\midrule
2277449 & King & 2116 & 1076.299149 \\
930913 & Pierce & 1668 & 558.101319 \\
849070 & Snohomish & 2087 & 406.837566 \\
553170 & Spokane & 1764 & 313.588435 \\
525563 & Clark & 628 & 836.883758 \\
11863 & Lincoln & 2311 & 5.133276 \\
7526 & Ferry & 2203 & 3.416251 \\
4842 & Wahkiakum & 263 & 18.410646 \\
4080 & Columbia & 869 & 4.695052 \\
2369 & Garfield & 711 & 3.331927 \\
\bottomrule
\end{tabular}
\end{table}



In [None]:
df_countyGDP = pd.read_csv('/content/washGDP.csv')

df_countyGDP = df_countyGDP.drop(index=0).reset_index(drop=True)

df_countyGDP['County'] = df_countyGDP['County'].str.replace('_', ' ', regex=False)

df_countyGDP.rename(columns={'2020': '2020 GDP'}, inplace=True)

df_countyGDP = df_countyGDP.sort_values(by='Rank', ascending=True).reset_index()

df_countyGDP = df_countyGDP.drop(columns=['Rank', '2022', '2019', '2021', '2020 change', '2021 Change', '2022 Change', 'Rank in State', 'index'])

df_countyGDP

Unnamed: 0,County,2020 GDP
0,King,329239691
1,Snohomish,45864728
2,Pierce,44792744
3,Spokane,26628547
4,Clark,22164411
5,Thurston,13747480
6,Whatcom,14158405
7,Kitsap,12734085
8,Benton,11478886
9,Yakima,10523373


In [None]:
df_chargingstations = pd.read_csv('/content/chargingstations.csv', delimiter=';', encoding='ISO-8859-1')

df_chargingstations = df_chargingstations.drop(columns=['Station Name', 'Street Address', 'ZIP', 'EV Network', 'EV Connector Types', 'Access Code', 'Access Detail Code', 'Facility Type'])

df_chargingstations = df_chargingstations[df_chargingstations['State'] == 'WA']

df_chargingstations['Total Chargers'] = df_chargingstations['EV Level1 EVSE Num'].fillna(0) + df_chargingstations['EV Level2 EVSE Num'].fillna(0) + df_chargingstations['EV DC Fast Count'].fillna(0)

df_chargingstations = df_chargingstations.drop(columns=['EV Level1 EVSE Num', 'EV Level2 EVSE Num', 'EV DC Fast Count'])

df_chargingstations = df_chargingstations.groupby('City', as_index=False).agg({'Total Chargers': 'sum'})

citiesInWA = car_counts

citiesInWA.drop(columns=['Car Count'])

county_chargers = pd.merge(citiesInWA, df_chargingstations, on='City', how='left')

county_chargers = county_chargers.groupby('County', as_index=False).agg({'Total Chargers': 'sum'})

county_chargers = county_chargers.sort_values(by='Total Chargers', ascending=False).reset_index()

county_chargers = county_chargers.drop(['index'], axis=1)

county_chargers

Unnamed: 0,County,Total Chargers
0,King,2925.0
1,Snohomish,455.0
2,Pierce,455.0
3,Spokane,287.0
4,Kitsap,270.0
5,Clark,249.0
6,Thurston,232.0
7,Skagit,207.0
8,Benton,141.0
9,Whatcom,124.0


In [None]:
# Renaming the 'county' column in df_washington to match 'County'
df_washington = df_washington.rename(columns={'county': 'County'})

# First, merge county_counts with df_countyGDP
merged_df = pd.merge(df_countyGDP, county_counts, on='County', how='left')

# Then merge the result with df_washington
merged_df = pd.merge(merged_df, df_washington, on='County', how='left')

# Then merge the result with df_washington
merged_df = pd.merge(merged_df, county_chargers, on='County', how='left')

# Fill missing values in 'Car_Count' with 0
merged_df['Car Count'] = merged_df['Car Count'].fillna(0)

# Create the final ML_TABLE with the desired columns
ML_TABLE = merged_df[['County', 'Car Count', 'Total Chargers', '2020 GDP', 'pop2020', 'landArea', 'density']]

ML_TABLE = ML_TABLE.rename(columns={'2020 GDP': 'GDP'})

ML_TABLE = ML_TABLE.rename(columns={'Car Count': 'Number of Vehicles'})

ML_TABLE = ML_TABLE.rename(columns={'pop2020': 'Population'})

ML_TABLE = ML_TABLE.rename(columns={'landArea': 'Land Area'})

ML_TABLE = ML_TABLE.rename(columns={'density': 'Population Density'})

ML_TABLE = ML_TABLE.sort_values('Number of Vehicles', ascending=False)

# Display the ML_TABLE
ML_TABLE.fillna(0, inplace=True)  # Fill NaN with 0

# Add a new column 'State' and set its value to 'Washington' for all rows
ML_TABLE['State'] = 'Washington'

# Display the updated ML_TABLE
ML_TABLE

Unnamed: 0,County,Number of Vehicles,Total Chargers,GDP,Population,Land Area,Population Density,State
0,King,99076,2925.0,329239691,2274282,2116,1076.299149,Washington
1,Snohomish,22643,455.0,45864728,829933,2087,406.837566,Washington
2,Pierce,14932,455.0,44792744,923589,1668,558.101319,Washington
4,Clark,11328,249.0,22164411,505338,628,836.883758,Washington
5,Thurston,6969,232.0,13747480,295998,723,414.062241,Washington
7,Kitsap,6303,270.0,12734085,275823,395,702.539241,Washington
3,Spokane,5015,287.0,26628547,541175,1764,313.588435,Washington
6,Whatcom,4602,124.0,14158405,227449,2108,110.645161,Washington
8,Benton,2351,141.0,11478886,207419,1700,128.005294,Washington
10,Skagit,2076,207.0,6679499,129902,1730,76.018497,Washington


In [None]:
df_california = pd.read_csv('/content/New_ZEV_Sales_Last_updated_11-19-2024_ada.csv')

df_california[['Data_Year', 'Quarter', 'County', 'FUEL_TYPE', 'MAKE', 'MODEL', 'Number of Vehicles']] = df_california['Data_Year;Quarter;County;FUEL_TYPE;MAKE;MODEL;Number of Vehicles'].str.split(';', expand=True)

df_california = df_california.drop(columns=['Data_Year;Quarter;County;FUEL_TYPE;MAKE;MODEL;Number of Vehicles'])

df_california = df_california.drop(columns=['Data_Year', 'Quarter', 'MAKE', 'MODEL'])

df_california['Number of Vehicles'] = pd.to_numeric(df_california['Number of Vehicles'], errors='coerce')

CA_vehicle_totals = df_california.groupby('County')['Number of Vehicles'].sum()

CA_vehicle_totals = CA_vehicle_totals[CA_vehicle_totals.index != "Out Of State"]

CA_vehicle_totals_sorted = CA_vehicle_totals.sort_values(ascending=False)

CA_vehicle_totals_sorted

Unnamed: 0_level_0,Number of Vehicles
County,Unnamed: 1_level_1
Los Angeles,581479
Orange,263287
Santa Clara,223206
San Diego,175122
Alameda,134796
Riverside,84558
San Mateo,81051
Contra Costa,76935
San Bernardino,66257
San Francisco,52093


In [None]:
california_charger = pd.read_csv('/content/EV_Chargers_Last_updated_08-26-2024_ada.csv')

california_charger[['County', 'Public Level 1', 'Shared Private Level 1', 'Public Level 2', 'Shared Private Level 2', 'Public DC Fast', 'Shared Private DC Fast', 'Total']] = california_charger['County;Public Level 1;Shared Private Level 1;Public Level 2;Shared Private Level 2;Public DC Fast;Shared Private DC Fast;Total'].str.split(';', expand=True)

california_charger = california_charger.drop(columns=['County;Public Level 1;Shared Private Level 1;Public Level 2;Shared Private Level 2;Public DC Fast;Shared Private DC Fast;Total'])

california_charger = california_charger.drop(columns=['Public Level 1', 'Shared Private Level 1', 'Public Level 2', 'Shared Private Level 2', 'Public DC Fast', 'Shared Private DC Fast'])

california_charger['Total'] = pd.to_numeric(california_charger['Total'], errors='coerce')

# Sort the DataFrame by the 'Total' column in descending order
california_charger = california_charger.sort_values('Total', ascending=False)

california_charger = california_charger.drop(index=58)

california_charger

Unnamed: 0,County,Total
18,Los Angeles,60698
42,Santa Clara,23365
36,San Diego,10982
29,Orange,8522
40,San Mateo,6994
0,Alameda,6478
33,Sacramento,3984
32,Riverside,3206
35,San Bernardino,2911
37,San Francisco,2853


In [None]:
# Example: Select rows by index
california_charger_selected_rows = california_charger.loc[[0, 1, 2, 3, 4, 53, 54, 55, 56, 57]]
california_charger_selected_rows = california_charger_selected_rows.to_latex(index=False, escape=False, column_format='||c c||', caption='2023 Texas County GDP')
print(california_charger_selected_rows)

\begin{table}
\caption{2023 Texas County GDP}
\begin{tabular}{||c c||}
\toprule
County & Total \\
\midrule
Alameda & 6478 \\
Alpine & 12 \\
Amador & 72 \\
Butte & 259 \\
Calaveras & 41 \\
Tulare & 488 \\
Tuolumne & 100 \\
Ventura & 1531 \\
Yolo & 810 \\
Yuba & 112 \\
\bottomrule
\end{tabular}
\end{table}



In [None]:
california_population = pd.read_csv('/content/californiapop.csv')

california_population = california_population.drop(columns=['name', 'pop2020', 'year', 'fips', 'state', 'stateCode', 'growthSince2020', 'slug', 'rank', 'densityMi', 'areaMi'])

california_population['county'] = california_population['county'].str.replace(' County', '', regex=False)

california_population


Unnamed: 0,pop2024,county,landArea,density
0,9606925,Los Angeles,4060,2366.237685
1,3262770,San Diego,4210,775.004751
2,3121138,Orange,793,3935.861286
3,2510643,Riverside,7209,348.265085
4,2196314,San Bernardino,20068,109.443592
5,1876849,Santa Clara,1291,1453.794733
6,1616117,Alameda,737,2192.83175
7,1584196,Sacramento,965,1641.653886
8,1153031,Contra Costa,717,1608.132497
9,1018965,Fresno,5958,171.024673


In [None]:
california_gdp = pd.read_csv('/content/gdpcalifornia.csv')

california_gdp[['California', '3.248.656.553']] = california_gdp['California;3.248.656.553'].str.split(';', expand=True)

california_gdp = california_gdp.drop(columns=['California;3.248.656.553'])

california_gdp = california_gdp.rename(columns={"California": "County", "3.248.656.553": "GDP"})

california_gdp['GDP'] = california_gdp['GDP'].str.replace('.', '', regex=False)

california_gdp


Unnamed: 0,County,GDP
0,Alameda,150532046
1,Alpine,105610
2,Amador,1659330
3,Butte,9278380
4,Calaveras,1472506
5,Colusa,1609218
6,Contra Costa,78357593
7,Del Norte,844062
8,El Dorado,8833331
9,Fresno,47612742


In [None]:
california_population
california_charger
california_gdp
CA_vehicle_totals_sorted

# Renaming the 'county' column in df_washington to match 'County'
california_population = california_population.rename(columns={'county': 'County'})

# First, merge county_counts with df_countyGDP
merged_df = pd.merge(california_gdp, CA_vehicle_totals_sorted, on='County', how='left')

# Then merge the result with df_washington
merged_df = pd.merge(merged_df, california_population, on='County', how='left')

# Then merge the result with df_washington
merged_df = pd.merge(merged_df, california_charger, on='County', how='left')

# Create the final ML_TABLE with the desired columns
ML_TABLE2 = merged_df[['County', 'Number of Vehicles', 'Total', 'GDP', 'pop2024', 'landArea', 'density']]

ML_TABLE2 = ML_TABLE2.sort_values('Number of Vehicles', ascending=False)

ML_TABLE2 = ML_TABLE2.rename(columns={'Total': 'Total Chargers'})

ML_TABLE2 = ML_TABLE2.rename(columns={'pop2024': 'Population'})

ML_TABLE2 = ML_TABLE2.rename(columns={'landArea': 'Land Area'})

ML_TABLE2 = ML_TABLE2.rename(columns={'density': 'Population Density'})

ML_TABLE2

Unnamed: 0,County,Number of Vehicles,Total Chargers,GDP,Population,Land Area,Population Density
18,Los Angeles,581479,60698,801967353,9606925,4060,2366.237685
29,Orange,263287,8522,273063592,3121138,793,3935.861286
42,Santa Clara,223206,23365,390234709,1876849,1291,1453.794733
36,San Diego,175122,10982,261671687,3262770,4210,775.004751
0,Alameda,134796,6478,150532046,1616117,737,2192.83175
32,Riverside,84558,3206,98880553,2510643,7209,348.265085
40,San Mateo,81051,6994,186334708,723777,449,1611.975501
6,Contra Costa,76935,2384,78357593,1153031,717,1608.132497
35,San Bernardino,66257,2911,102307449,2196314,20068,109.443592
37,San Francisco,52093,2853,237017759,810202,47,17238.340426


In [None]:
ML_TABLE = pd.concat([ML_TABLE, ML_TABLE2], ignore_index=True)

ML_TABLE = ML_TABLE.sort_values('Number of Vehicles', ascending=False)

ML_TABLE


Unnamed: 0,County,Number of Vehicles,Total Chargers,GDP,Population,Land Area,Population Density
38,Los Angeles,581479.0,60698.0,801967353,9606925,4060,2366.237685
39,Orange,263287.0,8522.0,273063592,3121138,793,3935.861286
40,Santa Clara,223206.0,23365.0,390234709,1876849,1291,1453.794733
41,San Diego,175122.0,10982.0,261671687,3262770,4210,775.004751
42,Alameda,134796.0,6478.0,150532046,1616117,737,2192.831750
...,...,...,...,...,...,...,...
21,Skamania,0.0,0.0,270333,12041,1658,7.735223
20,Ferry,0.0,0.0,190828,7198,2203,3.416251
19,Garfield,0.0,0.0,173024,2294,711,3.331927
18,San Juan,0.0,0.0,775348,17826,174,106.091954


In [None]:
florida_population = pd.read_csv('/content/Floridapop.csv')

florida_population = florida_population.drop(columns=['name', 'pop2020', 'year', 'fips', 'state', 'stateCode', 'growthSince2020', 'slug', 'rank', 'densityMi', 'areaMi'])

florida_population['county'] = florida_population['county'].str.replace(' County', '', regex=False)

florida_population

Unnamed: 0,pop2024,county,landArea,density
0,2700678,Miami-Dade,1900,1421.409474
1,1978173,Broward,1203,1644.366584
2,1557655,Hillsborough,1023,1522.634409
3,1547735,Palm Beach,1964,788.052444
4,1491071,Orange,903,1651.241417
...,...,...,...,...
62,13475,Calhoun,567,23.765432
63,13109,Glades,807,16.244114
64,12696,Franklin,545,23.295413
65,8379,Lafayette,543,15.430939


In [None]:
florida_gdp = pd.read_csv('/content/gdpflorida.csv')

florida_gdp[['Florida', '2023']] = florida_gdp['Florida;2023'].str.split(';', expand=True)

florida_gdp = florida_gdp.drop(columns=['Florida;2023'])

florida_gdp = florida_gdp.rename(columns={"Florida": "County", "2023": "GDP"})

florida_gdp['GDP'] = florida_gdp['GDP'].str.replace('.', '', regex=False)

florida_gdp

Unnamed: 0,County,GDP
0,Alachua,16176855
1,Baker,774660
2,Bay,10140163
3,Bradford,688898
4,Brevard,32780628
...,...,...
62,Union,394035
63,Volusia,21986706
64,Wakulla,872371
65,Walton,4720447


In [None]:
florida_fips = pd.read_csv('/content/Floridapop.csv')

florida_fips = florida_fips.drop(columns=['name', 'pop2020', 'year', 'pop2024', 'state', 'stateCode', 'growthSince2020', 'slug', 'rank', 'densityMi', 'areaMi', 'landArea', 'density'])

florida_fips

Unnamed: 0,fips,county
0,12086,Miami-Dade County
1,12011,Broward County
2,12057,Hillsborough County
3,12099,Palm Beach County
4,12095,Orange County
...,...,...
62,12013,Calhoun County
63,12043,Glades County
64,12037,Franklin County
65,12067,Lafayette County


In [None]:
florida_chargers = pd.read_csv('/content/Alternative_Fueling_Stations_-6669030252532885733.csv')

florida_chargers = florida_chargers.drop(columns=[
    'OBJECTID', 'access_code', 'access_days_time', 'access_detail_code', 'cards_accepted', 'date_last_confirmed', 'expected_date', 'fuel_type_code', 'groups_with_access_code', 'id', 'bd_blends_fr',
    'groups_with_access_code_fr', 'ev_pricing_fr', 'nps_unit_name', 'access_days_time_fr', 'intersection_directions_fr', 'federal_agency_id', 'federal_agency_code', 'federal_agency_name', 'ev_network_ids_station',
    'ev_network_ids_posts', 'x', 'y', 'station_phone', 'updated_at', 'lng_vehicle_class', 'lpg_nozzle_types', 'lpg_primary', 'ng_fill_type_code', 'ng_psi', 'ng_vehicle_class', 'rd_blended_with_biodiesel',
    'rd_blends', 'rd_blends_fr', 'rd_max_biodiesel_level', 'maximum_vehicle_class', 'open_date', 'owner_type_code', 'restricted_access', 'status_code', 'funding_sources', 'facility_type', 'geocode_status',
    'latitude', 'ev_other_evse', 'ev_pricing', 'ev_renewable_source', 'ev_workplace_charging', 'hy_is_retail',
    'hy_pressures', 'hy_standards', 'hy_status_link', 'longitude', 'country', 'lpg_nozzle_types', 'intersection_directions', 'plus4', 'street_address', 'zip', 'bd_blends', 'cng_dispenser_num', 'cng_fill_type_code',
    'cng_has_rng', 'cng_psi', 'cng_renewable_source', 'cng_total_compression', 'cng_total_storage', 'e85_blender_pump', 'e85_other_ethanol_blends', 'ev_connector_types', 'ev_dc_fast_num',
    'ev_level1_evse_num', 'ev_level2_evse_num', 'ev_network', 'ev_network_web', 'lng_has_rng', 'lng_renewable_source', 'cng_vehicle_class', 'ev_connector_types'])

florida_chargers = florida_chargers[florida_chargers['state'] == "FL"]

florida_cities = pd.read_csv('/content/Copy of Spreadsheet List of All cities in Florida export 2024-12-10 02-57-35.csv')

florida_cities = florida_cities.drop(columns=['state_id', 'state_name', 'lat', 'lng', 'population', 'type', 'name_ascii'])

florida_chargers = pd.merge(florida_chargers, florida_cities, left_on='city', right_on='name', how='left')

florida_chargers.dropna(inplace=True)

florida_chargers = florida_chargers.drop(columns=['name', 'state'])

florida_chargers = pd.merge(florida_chargers, florida_fips, left_on='county_fips', right_on='fips', how='left')

florida_chargers = florida_chargers.drop(columns=['county_fips', 'city'])

# Group the dataframe by 'county'
florida_chargers = florida_chargers.groupby('county')

# Calculate the total number of rows for each county
florida_chargers = florida_chargers.size().reset_index(name='total_rows')

# Drop the 'station_name' column
florida_chargers = florida_chargers.drop('station_name', axis=1, errors='ignore')

# Drop duplicate rows
florida_chargers = florida_chargers.drop_duplicates()

florida_chargers['county'] = florida_chargers['county'].str.replace(' County', '', regex=False)

florida_chargers = florida_chargers.rename(columns={'county': 'County'})

florida_chargers

Unnamed: 0,County,total_rows
0,Alachua,2
1,Brevard,3
2,Broward,2
3,Duval,6
4,Escambia,2
5,Hillsborough,5
6,Lee,1
7,Martin,1
8,Miami-Dade,2
9,Okaloosa,2


In [None]:
df_florida = pd.read_csv('/content/fl_ev_registrations_public.csv')

df_florida = df_florida.drop(columns=['DMV ID', 'DMV Snapshot (Date)', 'Vehicle Name', 'Registration Valid Date', 'Registration Expiration Date', 'Technology'])

df_florida = df_florida.groupby('County')

df_florida = df_florida.size().reset_index(name='total_rows')

df_florida['County'] = df_florida['County'].str.replace('Dade', 'Miami-Dade', regex=False)

df_florida = df_florida[df_florida["County"] != "Unknown"]

df_florida

Unnamed: 0,County,total_rows
0,Alachua,335
1,Baker,3
2,Bay,141
3,Bradford,10
4,Brevard,821
...,...,...
58,Taylor,1
59,Volusia,495
60,Wakulla,12
61,Walton,96


In [None]:
florida_population
florida_chargers
florida_gdp
df_florida

# Renaming the 'county' column in df_washington to match 'County'
florida_population = florida_population.rename(columns={'county': 'County'})

# First, merge county_counts with df_countyGDP
merged_fl = pd.merge(florida_gdp, df_florida, on='County', how='left')

# Renaming the 'county' column in df_washington to match 'County'
merged_fl = merged_fl.rename(columns={'total_rows': 'Number of Vehicles'})

# Then merge the result with df_washington
merged_fl = pd.merge(merged_fl, florida_population, on='County', how='left')

# Then merge the result with df_washington
merged_fl = pd.merge(merged_fl, florida_chargers, on='County', how='left')

merged_fl = merged_fl.rename(columns={'total_rows': 'Total Chargers'})

# Create the final ML_TABLE with the desired columns
ML_TABLE2 = merged_fl[['County', 'GDP', 'Number of Vehicles', 'pop2024', 'landArea', 'density', 'Total Chargers']]

ML_TABLE2 = ML_TABLE2.sort_values('Number of Vehicles', ascending=False)

ML_TABLE2 = ML_TABLE2.rename(columns={'pop2024': 'Population'})

ML_TABLE2 = ML_TABLE2.rename(columns={'landArea': 'Land Area'})

ML_TABLE2 = ML_TABLE2.rename(columns={'density': 'Population Density'})

ML_TABLE2.fillna(0, inplace=True)

ML_TABLE2

Unnamed: 0,County,GDP,Number of Vehicles,Population,Land Area,Population Density,Total Chargers
42,Miami-Dade,192808376,4198.0,2700678,1900,1421.409474,2.0
5,Broward,132361553,3424.0,1978173,1203,1644.366584,2.0
27,Hillsborough,115772943,1914.0,1557655,1023,1522.634409,5.0
49,Palm Beach,106719346,1832.0,1547735,1964,788.052444,1.0
47,Orange,121343736,1649.0,1491071,903,1651.241417,9.0
...,...,...,...,...,...,...,...
12,DeSoto,1079903,0.0,36638,637,57.516484,0.0
22,Hamilton,588042,0.0,13748,514,26.747082,0.0
32,Lafayette,210912,0.0,8379,543,15.430939,0.0
48,Osceola,14415205,0.0,453025,1328,341.133283,0.0


In [None]:
ML_TABLE = pd.concat([ML_TABLE, ML_TABLE2], ignore_index=True)

ML_TABLE = ML_TABLE.sort_values('Number of Vehicles', ascending=False)

# Drop duplicate rows based on all columns
ML_TABLE.drop_duplicates(inplace=True)
# Reset the index of the DataFrame after removing duplicates
ML_TABLE.reset_index(drop=True, inplace=True)

ML_TABLE


NameError: name 'pd' is not defined

In [None]:
# Convert specified columns to integer type
columns_to_convert = ['Number of Vehicles', 'Total Chargers', 'GDP', 'Population', 'Land Area', 'Population Density']

# Use the astype method to convert to int
for column in columns_to_convert:
    ML_TABLE[column] = ML_TABLE[column].astype(int)

# Display the updated DataFrame to verify the changes
print(ML_TABLE[columns_to_convert].dtypes)
# Remove rows where "Number of Vehicles" is less than 1000
# ML_TABLE = ML_TABLE[ML_TABLE["Number of Vehicles"] >= 1000]
# Display the updated DataFrame (optional)
ML_TABLE


Number of Vehicles    int64
Total Chargers        int64
GDP                   int64
Population            int64
Land Area             int64
Population Density    int64
dtype: object


Unnamed: 0,County,Number of Vehicles,Total Chargers,GDP,Population,Land Area,Population Density
0,Los Angeles,581479,60698,801967353,9606925,4060,2366
1,Orange,263287,8522,273063592,3121138,793,3935
2,Santa Clara,223206,23365,390234709,1876849,1291,1453
3,San Diego,175122,10982,261671687,3262770,4210,775
4,Alameda,134796,6478,150532046,1616117,737,2192
...,...,...,...,...,...,...,...
158,Lincoln,0,0,489660,10928,2311,5
159,Skamania,0,0,270333,12041,1658,7
160,Ferry,0,0,190828,7198,2203,3
161,Garfield,0,0,173024,2294,711,3


In [None]:
# Save the DataFrame to a CSV file

ML_TABLE.to_csv('ML_TABLE.csv', index=False)


# Download the CSV file

from google.colab import files

files.download('ML_TABLE.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>