In [1]:
# import the pandas the library
import pandas as pd
# import the warnings file so we can supress unnecessary warnings when running code
import warnings
warnings.filterwarnings('ignore')   # The object 'warnings' is used to call the method 'filterwarnings' and ignore the warnings
# import the library to make a barchart eyc
import matplotlib.pyplot as plt



In [2]:
# import the file on Irish grads
df_ny_air = pd.read_csv('./dataset/Automated_Traffic_Volume_Counts.csv')

# define some functions i can use to clean data
# function to Rename columns in a given DataFrame, df (target pandas DataFrame), 
# current_names (list): a list of current column names to be renamed
# new_names (list): a list of new column names to replace current names  
def rename_columns(df, current_names, new_names):
    if len(current_names) != len(new_names):
        raise ValueError("current_names and new_names should have the same length")
    
    mapping = dict(zip(current_names, new_names))
    df = df.rename(columns=mapping)
    return df

# function to Rename row data in a given columns in a given DataFrame, df (target pandas DataFrame), 
# col_name colum you want to target,  changes (list) an array of the changes
def rename_row_data(df, col_name, changes):
    for old_val, new_val in changes.items():
        df.loc[df[col_name] == old_val, col_name] = new_val



In [3]:
# get the number of rows
len(df_ny_air)

27414481

In [4]:
# Display first 5 records
df_ny_air.head()

Unnamed: 0,RequestID,Boro,Yr,M,D,HH,MM,Vol,SegmentID,WktGeom,street,fromSt,toSt,Direction
0,1100,Staten Island,2011,10,14,10,45,102,103882,POINT (963246 173171.4),BAY ST,RICHMOND TERR,STUYVESANT PL,SB
1,1100,Staten Island,2011,10,14,11,0,110,103882,POINT (963246 173171.4),BAY ST,RICHMOND TERR,STUYVESANT PL,SB
2,1100,Staten Island,2011,10,14,11,15,101,103882,POINT (963246 173171.4),BAY ST,RICHMOND TERR,STUYVESANT PL,SB
3,1100,Staten Island,2011,10,14,11,30,112,103882,POINT (963246 173171.4),BAY ST,RICHMOND TERR,STUYVESANT PL,SB
4,1100,Staten Island,2011,10,14,11,45,94,103882,POINT (963246 173171.4),BAY ST,RICHMOND TERR,STUYVESANT PL,SB


In [5]:
# Clean up columns, get rid of any columns that are not of use to us.
df_ny_air = df_ny_air.drop(['RequestID','fromSt','toSt','Direction'], axis=1)

In [6]:
# Rename the columns in the Dataframe 
# df_irishgrads.rename(columns = ({'Sex': 'Gender'}), inplace=True)
df_ny_air = rename_columns(df_ny_air, 
                            ['Boro', 'Yr'], 
                            ['Borough', 'Year'])
# print(df_irishgrads)
# PRINT ALL THE COLUMN HEADINGS THAT REMAIN
print(df_ny_air.columns)

Index(['Borough', 'Year', 'M', 'D', 'HH', 'MM', 'Vol', 'SegmentID', 'WktGeom',
       'street'],
      dtype='object')


In [7]:
unique_values_boro = df_ny_air['Borough'].unique()
print(unique_values_boro)

['Staten Island' 'Manhattan' 'Brooklyn' 'Queens' 'Bronx']


In [8]:
df_ny_air.head()

Unnamed: 0,Borough,Year,M,D,HH,MM,Vol,SegmentID,WktGeom,street
0,Staten Island,2011,10,14,10,45,102,103882,POINT (963246 173171.4),BAY ST
1,Staten Island,2011,10,14,11,0,110,103882,POINT (963246 173171.4),BAY ST
2,Staten Island,2011,10,14,11,15,101,103882,POINT (963246 173171.4),BAY ST
3,Staten Island,2011,10,14,11,30,112,103882,POINT (963246 173171.4),BAY ST
4,Staten Island,2011,10,14,11,45,94,103882,POINT (963246 173171.4),BAY ST


In [9]:
# find all the unique values in the year column
unique_values_year = df_ny_air['Year'].unique()
print(unique_values_year)

[2011 2010 2009 2000 2008 2012 2007 2006 2013 2016 2014 2015 2017 2018
 2019 2020 2021 2022]


In [10]:
years_to_keep = [2022, 2021, 2020]
df_ny_air_filtered = df_ny_air[df_ny_air['Year'].isin(years_to_keep)]

In [11]:
df_ny_air_filtered.head()

Unnamed: 0,Borough,Year,M,D,HH,MM,Vol,SegmentID,WktGeom,street
26261415,Staten Island,2020,1,25,0,0,49,7457,POINT (948912.7573158939 149574.99331751716),ROCKLAND AVENUE
26261416,Staten Island,2020,1,25,0,15,48,7457,POINT (948912.7573158939 149574.99331751716),ROCKLAND AVENUE
26261417,Staten Island,2020,1,25,0,30,52,7457,POINT (948912.7573158939 149574.99331751716),ROCKLAND AVENUE
26261418,Staten Island,2020,1,25,0,45,27,7457,POINT (948912.7573158939 149574.99331751716),ROCKLAND AVENUE
26261419,Staten Island,2020,1,25,1,0,22,7457,POINT (948912.7573158939 149574.99331751716),ROCKLAND AVENUE


In [12]:
df_ny_air_filtered.to_csv('./dataset/Automated_Traffic_Volume_Counts_2022-2020.csv', index=False)

In [33]:
df_ny_air_filtered.shape

(498048, 10)