In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd

data_path = '../../data/'  
df_car_detectors = gpd.read_file(data_path + 'all_car_detectors.geojson')

In [2]:
df_car_detectors

Unnamed: 0,iu_ac,date_debut,date_fin,libelle,iu_nd_aval,libelle_nd_aval,iu_nd_amont,libelle_nd_amont,geo_point_2d,geometry
0,1743,1996-10-03 02:00:00+00:00,2023-01-01 01:00:00+00:00,Bd_Macdonald,928,Bd_Macdonald - Jaques Duchesne,929,Bd_Macdonald - Lounes Matoub,"{'lon': 2.3753362643387073, 'lat': 48.89885056...","LINESTRING (2.37633 48.89887, 2.37435 48.89884)"
1,1746,1996-10-03 02:00:00+00:00,2023-01-01 01:00:00+00:00,Bd_Macdonald,3630,Bd Macdonald - Rue E. 019,929,Bd_Macdonald - Lounes Matoub,"{'lon': 2.3785952253516314, 'lat': 48.89874752...","LINESTRING (2.37634 48.89868, 2.38085 48.89881)"
2,914,1996-10-03 02:00:00+00:00,2023-01-01 01:00:00+00:00,Av_Pte_Vincennes,524,Pte_Vincennes-Acces_PI,514,Bd_Davout-Pte_de_Vincennes,"{'lon': 2.412529726334607, 'lat': 48.846824834...","LINESTRING (2.41126 48.84690, 2.41152 48.84693..."
3,1708,1996-10-03 02:00:00+00:00,2023-01-01 01:00:00+00:00,Bd_Ney,899,Ney-SNCF_Nord_1,112,Av_Pte_Poissonniers-Bd_Ney,"{'lon': 2.353299882519023, 'lat': 48.898320555...","LINESTRING (2.35254 48.89830, 2.35406 48.89834)"
4,862,1996-10-03 02:00:00+00:00,2023-01-01 01:00:00+00:00,Bd_Massena,497,Bd_Massena_Porte_de_France,495,Bd_Massena-Av-Pte_Vitry,"{'lon': 2.3783630199335652, 'lat': 48.82451111...","LINESTRING (2.37712 48.82405, 2.37960 48.82497)"
...,...,...,...,...,...,...,...,...,...,...
3734,1831,2005-01-01 01:00:00+00:00,2019-06-01 02:00:00+00:00,N_2,942,Pte_Villette-Acces_PI,971,Pte_Villette-Sortie_PE_RN2,"{'lon': 2.3869802636004467, 'lat': 48.90082988...","LINESTRING (2.38707 48.90095, 2.38689 48.90071)"
3735,6140,2005-01-01 01:00:00+00:00,2019-06-01 02:00:00+00:00,SI_Quai_Issy_bretelle_1,599,Quai_Issy_bretelles_1-2,595,SI_Quai_Issy,"{'lon': 2.2701070063099777, 'lat': 48.83548466...","LINESTRING (2.27149 48.83551, 2.27011 48.83552..."
3736,4372,2005-01-01 01:00:00+00:00,2019-06-01 02:00:00+00:00,Av_Marceau,2345,Av_Marceau-Av_Pdt_Wilson,2343,Place_Brisson,"{'lon': 2.2997389284685914, 'lat': 48.86578366...","LINESTRING (2.29958 48.86639, 2.29990 48.86518)"
3737,4931,2005-01-01 01:00:00+00:00,2019-06-01 02:00:00+00:00,Cours_de_Vincennes,289,Vincennes-Picpus-Charonne,2608,Cours_de_Vincennes-Face_35,"{'lon': 2.3999136948032023, 'lat': 48.84808813...","LINESTRING (2.40062 48.84803, 2.39920 48.84815)"


In [3]:
def find_detectors_by_year(df, year):
    """
    Finds detectors installed in a given year.

    Parameters:
    df (DataFrame): GeoDataFrame containing detectors information.
    year (int): The year to filter detectors for.

    Returns:
    DataFrame: DataFrame containing detectors installed in the specified year.
    """
    detectors_in_year = df[(df['date_debut'].dt.year <= year) & (df['date_fin'].dt.year >= year)]
    return detectors_in_year

def compute_changes_in_detectors(df, start_year, end_year):
    """
    Computes changes in detectors between consecutive years.

    Parameters:
    df (DataFrame): GeoDataFrame containing detectors information.
    start_year (int): The start year for analysis.
    end_year (int): The end year for analysis.

    Returns:
    DataFrame: DataFrame containing the count of detectors added and removed each year.
    """
    changes = []
    for year in range(start_year + 1, end_year + 1):
        detectors_prev_year = find_detectors_by_year(df, year - 1)
        detectors_current_year = find_detectors_by_year(df, year)        
        added_detectors = len(detectors_current_year) - len(detectors_prev_year)
        removed_detectors = len(detectors_prev_year) - len(detectors_current_year)
        changes.append({'Year': year, 'Detectors_Added': added_detectors, 'Detectors_Removed': removed_detectors})
    
    changes_df = pd.DataFrame(changes)
    return changes_df

def read_detector_data():
    ldd_2010_2012 = pd.read_csv(data_path + 'traffic_data/traffic_data_2010_2012.csv')
    ldd_2013_2020 = pd.read_csv(data_path + 'traffic_data/traffic_data.csv')
    ldd_2021_2022 = pd.read_csv(data_path + 'traffic_data/traffic_data_2021_2022.csv')
    ldd_2023 = pd.read_csv(data_path + 'traffic_data/traffic_data_2023.csv')
    
    # Convert each dataframe's t_1h column to datetime before concatenation
    for df in [ldd_2010_2012, ldd_2013_2020, ldd_2021_2022, ldd_2023]:
        # First convert to datetime with timezone info
        df['t_1h'] = pd.to_datetime(df['t_1h'], errors='coerce')
        # Then remove timezone info if present
        df['t_1h'] = df['t_1h'].dt.tz_localize(None)
        
    ldd = pd.concat([ldd_2010_2012, ldd_2013_2020, ldd_2021_2022, ldd_2023])
    # ldd['t_1h'] = pd.to_datetime(ldd['t_1h'])
    return ldd

def find_detectors_by_year_ldd(df, year):
    """
    Finds detectors installed in a given year.

    Parameters:
    df (DataFrame): GeoDataFrame containing detectors information.
    year (int): The year to filter detectors for.

    Returns:
    DataFrame: DataFrame containing detectors installed in the specified year.
    """
    entries_in_year = df[(df['t_1h'].dt.year <= year) & (df['t_1h'].dt.year >= year)]

    det_in_year = entries_in_year['iu_ac'].unique()
    return det_in_year

In [4]:
len(find_detectors_by_year(df_car_detectors, 2000)['iu_ac'].unique())

2876

In [5]:
changes = []
for year in range(1997, 2024):
    detectors_prev_year = find_detectors_by_year(df_car_detectors, year - 1)
    detectors_current_year = find_detectors_by_year(df_car_detectors, year)
    
    unique_det_prev = detectors_prev_year['iu_ac'].unique()
    unique_det_curr = detectors_current_year['iu_ac'].unique()
    
    det_in_both = np.intersect1d(unique_det_prev, unique_det_curr)
    det_in_just_prev = np.setdiff1d(unique_det_prev, det_in_both)
    det_in_just_curr = np.setdiff1d(unique_det_curr, det_in_both)
    
    added_detectors = len(det_in_just_curr) 
    removed_detectors = len(det_in_just_prev) 
    
    # print(added_detectors, removed_detectors)
    
    changes.append({'Year': year, 'Number_of_Detectors': len(unique_det_curr), 'Detectors_Added': added_detectors, 'Detectors_Removed': removed_detectors})
    
changes_df = pd.DataFrame(changes)

In [6]:
changes_df

Unnamed: 0,Year,Number_of_Detectors,Detectors_Added,Detectors_Removed
0,1997,2758,259,0
1,1998,2805,47,0
2,1999,2820,15,0
3,2000,2876,56,0
4,2001,2933,57,0
5,2002,2968,35,0
6,2003,2991,23,0
7,2004,2994,3,0
8,2005,3060,66,0
9,2006,3092,32,0


In [7]:
ldd = read_detector_data()

In [8]:
def find_detectors_by_year_ldd_delivering_at_least_once_q_or_k(df, year):
    """
    Finds detectors installed in a given year.

    Parameters:
    df (DataFrame): GeoDataFrame containing detectors information.
    year (int): The year to filter detectors for.

    Returns:
    DataFrame: DataFrame containing detectors installed in the specified year.
    """
    entries_in_year = df[(df['t_1h'].dt.year <= year) & (df['t_1h'].dt.year >= year)]
    
    # filter entries_in_year for entries, where q and k are not NaN
    entries_in_year = entries_in_year[(~entries_in_year['q'].isnull()) | (~entries_in_year['k'].isnull())]
    print(entries_in_year.head())
    
    det_in_year = entries_in_year['iu_ac'].unique()
    return det_in_year

def find_detectors_by_year_ldd_delivering_at_least_once_q_and_k(df, year):
    """
    Finds detectors installed in a given year.

    Parameters:
    df (DataFrame): GeoDataFrame containing detectors information.
    year (int): The year to filter detectors for.

    Returns:
    DataFrame: DataFrame containing detectors installed in the specified year.
    """
    entries_in_year = df[(df['t_1h'].dt.year <= year) & (df['t_1h'].dt.year >= year)]
    
    # filter entries_in_year for entries, where q and k are not NaN
    entries_in_year = entries_in_year[entries_in_year['q'].notna()]
    entries_in_year = entries_in_year[entries_in_year['k'].notna()]
    print(entries_in_year.head())
    
    det_in_year = entries_in_year['iu_ac'].unique()
    return det_in_year

In [9]:
ldd

Unnamed: 0,iu_ac,t_1h,q,k,hour,day
0,799,2010-01-01 05:00:00,,1.00000,5,2010-01-01
1,799,2010-01-01 06:00:00,,1.00000,6,2010-01-01
2,799,2010-01-01 07:00:00,,0.00000,7,2010-01-01
3,799,2010-01-01 08:00:00,,0.00000,8,2010-01-01
4,799,2010-01-01 09:00:00,,0.00000,9,2010-01-01
...,...,...,...,...,...,...
19231031,7257,2023-12-31 18:00:00,273.0,14.57222,18,2023-12-31
19231032,7257,2023-12-31 19:00:00,250.0,11.96278,19,2023-12-31
19231033,7257,2023-12-31 20:00:00,236.0,15.74833,20,2023-12-31
19231034,7257,2023-12-31 21:00:00,227.0,7.35222,21,2023-12-31


In [10]:
find_detectors_by_year_ldd_delivering_at_least_once_q_or_k(ldd, 2015)

          iu_ac                t_1h   q        k  hour         day
29977028    799 2015-01-01 05:00:00 NaN  0.43667     5  2015-01-01
29977029    799 2015-01-01 06:00:00 NaN  0.43000     6  2015-01-01
29977030    799 2015-01-01 07:00:00 NaN  0.24722     7  2015-01-01
29977031    799 2015-01-01 08:00:00 NaN  0.19556     8  2015-01-01
29977032    799 2015-01-01 09:00:00 NaN  0.38445     9  2015-01-01


array([ 799,  651, 4950, ..., 1222, 1111, 1109])

In [11]:
len(find_detectors_by_year_ldd(ldd, 2015))

3267

In [12]:
changes = []
for year in range(2010, 2024):
    detectors_prev_year = find_detectors_by_year_ldd_delivering_at_least_once_q_or_k(ldd, year - 1)
    detectors_current_year = find_detectors_by_year_ldd_delivering_at_least_once_q_and_k(ldd, year)
    
    det_in_both = np.intersect1d(detectors_prev_year, detectors_current_year)
    det_in_just_prev = np.setdiff1d(detectors_prev_year, det_in_both)
    det_in_just_curr = np.setdiff1d(detectors_current_year, det_in_both)
    
    added_detectors = len(det_in_just_curr) 
    removed_detectors = len(det_in_just_prev) 
    
    print(added_detectors, removed_detectors)
    
    changes.append({'Year': year, 'Number_of_Detectors': len(detectors_current_year), 'Detectors_Added': added_detectors, 'Detectors_Removed': removed_detectors})
    
changes_ldd_at_least_one_q_or_k_df = pd.DataFrame(changes)

Empty DataFrame
Columns: [iu_ac, t_1h, q, k, hour, day]
Index: []
    iu_ac                t_1h      q    k  hour         day
18    651 2010-01-01 05:00:00  318.0  3.0     5  2010-01-01
19    651 2010-01-01 06:00:00  254.0  3.0     6  2010-01-01
20    651 2010-01-01 07:00:00  233.0  3.0     7  2010-01-01
21    651 2010-01-01 08:00:00  161.0  2.0     8  2010-01-01
22    651 2010-01-01 09:00:00  123.0  1.0     9  2010-01-01
1881 0
   iu_ac                t_1h   q    k  hour         day
0    799 2010-01-01 05:00:00 NaN  1.0     5  2010-01-01
1    799 2010-01-01 06:00:00 NaN  1.0     6  2010-01-01
2    799 2010-01-01 07:00:00 NaN  0.0     7  2010-01-01
3    799 2010-01-01 08:00:00 NaN  0.0     8  2010-01-01
4    799 2010-01-01 09:00:00 NaN  0.0     9  2010-01-01
          iu_ac                t_1h       q     k  hour         day
15346476    651 2011-01-03 05:00:00    68.0   1.0     5  2011-01-03
15346477    651 2011-01-03 06:00:00   164.0   2.0     6  2011-01-03
15346478    651 2011-01-03 

KeyboardInterrupt: 

In [13]:
detectors_delivering_data = find_detectors_by_year_ldd_delivering_at_least_once_q_and_k(ldd, 2023)

       iu_ac                t_1h       q         k  hour         day
25800      5 2023-01-01 05:00:00  1403.0  14.10723     5  2023-01-01
25801      5 2023-01-01 06:00:00  1035.0  10.57111     6  2023-01-01
25802      5 2023-01-01 07:00:00   728.0   6.84722     7  2023-01-01
25803      5 2023-01-01 08:00:00   702.0   6.85389     8  2023-01-01
25804      5 2023-01-01 09:00:00   754.0   6.98778     9  2023-01-01


In [14]:
len(detectors_delivering_data)

1406

In [19]:
detectors_delivering_data

np.savetxt(data_path + 'detectors_delivering_data_2023.csv', detectors_delivering_data, delimiter=",", fmt='%d')

In [16]:
detectors_delivering_data.to_csv(data_path + 'detectors_delivering_data_2023.csv', index=False)

AttributeError: 'numpy.ndarray' object has no attribute 'to_csv'

In [None]:
# changes_ldd_at_least_one_q_or_k_df.to_csv(data_path + 'processed_data/detectors_in_ldd_at_least_one_q_or_k_per_year.csv', index=False)

In [29]:
changes_ldd_at_least_one_q_or_k_df

Unnamed: 0,Year,Number_of_Detectors,Detectors_Added,Detectors_Removed
0,2010,1881,1881,0
1,2011,1821,83,718
2,2012,1749,40,740
3,2013,1767,79,672
4,2014,1714,33,737
5,2015,1729,34,701
6,2016,1780,59,641
7,2017,1857,47,655
8,2018,1815,24,730
9,2019,1854,61,705


In [31]:
# Assuming changes_df is already defined and contains the relevant data
changes_ldd_at_least_one_q_or_k_df['Share of detectors delivering value'] = changes_ldd_at_least_one_q_or_k_df.apply(
    lambda row: row['Number_of_Detectors'] / changes_df.loc[changes_df['Year'] == row['Year'], 'Number_of_Detectors'].values[0],
    axis=1
)




NameError: name 'changes_ldd_at_least_one_q_or_k_df' is not defined

In [32]:
changes_ldd_at_least_one_q_or_k_df

Unnamed: 0,Year,Number_of_Detectors,Detectors_Added,Detectors_Removed,Share of detectors delivering value
0,2010,1881,1881,0,0.6
1,2011,1821,83,718,0.577179
2,2012,1749,40,740,0.550866
3,2013,1767,79,672,0.554614
4,2014,1714,33,737,0.531308
5,2015,1729,34,701,0.528746
6,2016,1780,59,641,0.543844
7,2017,1857,47,655,0.56876
8,2018,1815,24,730,0.554876
9,2019,1854,61,705,0.55877
