In [1]:
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import geopandas as gpd
import regex as re
import json
import random
from pathlib import Path
from tqdm import tqdm
from geopy.geocoders import Nominatim
from shapely.geometry import Polygon, Point


In [2]:
DATA_DIR = Path("data_bomen")
INCIDENT_DATA_PATH = DATA_DIR / 'Incidenten_oorspronkelijk_volledig.csv'
TREE_DATA_PATH =  DATA_DIR / "BOMEN_DATA.csv"
TREE_DATA_WITH_ZIP_PATH = DATA_DIR / "BOMEN_DATA_WITH_ZIP.csv"
ZIPCODE_JSON_PATH = DATA_DIR / "zipcodes_boxes.json"

GRID_SIZE = 200     ## GRID SIZE IN METERS
TREE_DATA_CLEAN_PATH = DATA_DIR / f"tree_geo_data_clean_{str(GRID_SIZE)}.csv"
GRID_DATA_PATH = DATA_DIR / f"grid_enriched_{GRID_SIZE}.csv"
INCIDENTS_WEATHER_PATH = DATA_DIR / "incidents_weather.csv"
INCIDENTS_WEATHER_GEO_PATH = DATA_DIR / f"incidents_weather_geo_{GRID_SIZE}.csv"

POSITIVE_SAMPLES_PATH = DATA_DIR / f"positive_samples{GRID_SIZE}.csv"
NEGATIVE_SAMPLES_PATH = DATA_DIR / f"negative_samples_{GRID_SIZE}.csv"

ZIP_KEY = "Zipcode"
ZIP4_KEY = "Zip4"

DATE_WINDOW = 7

AMSTERDAM_BBOX = (52.26618, 4.64663, 52.475115999999994, 5.150491999999999)

TREE_COLUMNS = [
    "id",
    "soortnaamKort",        # andere soortnamen??
    "boomhoogte",
    "stamdiameter",
    "jaarVanAanleg",
    "typeObject",
    "standplaatsGedetailleerd",
    'SDVIEW',
    "RADIUS",
    "location",
    "grid_id",
]

MAP_BOOMHOOGTE = {
    'a. tot 6 m.' : "0-6",
    'b. 6 tot 9 m.': "6-9",
    'c. 9 tot 12 m.': "9-12",
    'd. 12 tot 15 m.': "12-15",
    'e. 15 tot 18 m.': "15-18",
    'f. 18 tot 24 m.': "18-25",
    'g. 24 m. en hoger': "24",
    'q. Niet van toepassing': "hQ"
}

MAP_STAMDIAMETER = {
    '0,1 tot 0,2 m.': "0.1-0.2",
    '0,2 tot 0,3 m.' : "0.2-0.3",
    '0,3 tot 0,5 m.': "0.3-0.5",
    '0,5 tot 1 m.': "0.5-1.0",
    '1,0 tot 1,5 m.': "1.0-1.5",
    '1,5 m. en grot': "1.5",
    'Onbekend': "dQ",
}

SERVICE_AREAS_OUT_OF_SCOPE = [
    "Amstelveen",
    "Aalsmeer",
    "Uithoorn"
]

RF_INCIDENT_COLUMNS = [
    "Incident_ID",
    "Service_Area",
    "grid_id",
    "Date",
    "Hour",
    "temperature_2m",
    "relative_humidity_2m",
    "dew_point_2m",
    "apparent_temperature",
    "precipitation",
    "rain",
    "snowfall",
    "snow_depth",
    "weather_code",
    "pressure_msl",
    "surface_pressure",
    "wind_speed_10m",
    "wind_direction_10m",
    "wind_gusts_10m",
    "soil_temperature_0_to_7cm",
    "soil_temperature_7_to_28cm",
    "soil_temperature_28_to_100cm",
    "soil_temperature_100_to_255cm",
    "soil_moisture_0_to_7cm",
    "soil_moisture_7_to_28cm",
    "soil_moisture_28_to_100cm",
    "soil_moisture_100_to_255cm",
]

RF_TREE_COLUMNS = [
    "tree_id",
    "grid_id",
    "soortnaamKort",
    "boomhoogte",
    "stamdiameter",
    "jaarVanAanleg",
    "typeObject",
    "standplaatsGedetailleerd",
]

RF_GRID_COLUMNS = [
    "grid_id",
    "has_tree",
    "avg_height",
    "avg_diameter",
    'avg_year',
    'Fraxinus', 
    'Salix', 
    'Alnus', 
    'Quercus', 
    'Tilia', 
    'Acer',
    'Populus', 
    'Betula', 
    'Prunus', 
    'Platanus', 
    'Malus', 
    'Robinia',
    'Crataegus', 
    'Ulmus', 
    'Carpinus', 
    'Overig', 
    'Onbekend'
]

In [3]:
# read storm_data
df = pd.read_csv(INCIDENT_DATA_PATH, sep=",", encoding="utf-8")
df = df.set_index('Incident_ID')

# read tree data
trees = pd.read_csv(TREE_DATA_PATH, sep=",", encoding="utf-8")

In [4]:
incidents_weather_df = pd.read_csv(INCIDENTS_WEATHER_PATH, sep=",", encoding="utf-8")
# Filter on areas in scope
incidents_weather_df = incidents_weather_df[~incidents_weather_df.Service_Area.isin(SERVICE_AREAS_OUT_OF_SCOPE)]


In [5]:
df_tree_incidents = df[df["Damage_Type"]=="Tree"]
df_tree_incidents

Unnamed: 0_level_0,Date,Incident_Starttime,Incident_Endtime,Incident_Duration,Incident_Priority,Service_Area,Municipality,Damage_Type,LON,LAT
Incident_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
511,2018-09-07,08:25:18,10:00:56,01:35:38,2.0,Amstelveen,Amstelveen,Tree,4.838685,52.281552
738,2018-09-10,16:46:38,18:00:55,01:14:17,2.0,Victor,Amsterdam,Tree,4.930968,52.359724
1493,2018-09-21,06:59:05,08:21:25,01:22:20,2.0,Amstelveen,Amstelveen,Tree,4.879741,52.301365
1502,2018-09-21,08:29:22,10:06:43,01:37:21,2.0,Dirk,Amsterdam,Tree,4.870656,52.355189
1507,2018-09-21,09:25:55,11:43:54,02:17:59,2.0,Teunis,Amsterdam,Tree,4.832333,52.380748
...,...,...,...,...,...,...,...,...,...,...
443469,2023-11-02,19:06:07,19:06:38,00:00:31,2.0,Weesp,Amsterdam,Tree,5.076108,52.299105
443472,2023-11-02,19:45:17,20:27:36,00:42:19,2.0,Willem,Amsterdam,Tree,4.908217,52.326775
443474,2023-11-02,19:53:02,23:41:51,03:48:49,2.0,Pieter,Amsterdam,Tree,4.822130,52.342331
443476,2023-11-02,20:00:19,20:52:15,00:51:56,2.0,Osdorp,Amsterdam,Tree,4.787237,52.362777


#### Zipcode shit, doesn't really work or is necessary

In [6]:
# #Extracts the first 4 digits of zipcode '1010 AA' --> '1010' 
# def extract_zip_4(
#     df,
#     zip_col,
#     new_col,
# ):
#     # Strips off last two number from zipcode (1010 AA - > 1010)
#     pattern = r'\d{4}'
#     df[new_col] = [re.match(pattern, zip_code)[0] if isinstance(zip_code, str) else None for zip_code in df[zip_col].values]
#     return df


# def add_zipcode(
#     df
# ):
#     # Adds zipcode based on lat and long coordinate values
#     geolocator = Nominatim(user_agent="my_request")
#     for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing"):
#         Latitude = row['LAT']
#         Longitude = row['LNG']
        
#         location = geolocator.reverse(f"{Latitude},{Longitude}")
#         address = location.raw['address']
    
#         zipcode = str(address.get('postcode'))  
        
#         df.at[index, 'Zipcode'] = zipcode
#     return df

# #Map zips to coordinates
# def map_zip_to_crs(
#     df
# ):
#     geolocator = Nominatim(user_agent="my_request")
#     zipcodes = df['Zipcode'].unique()
#     zip_dict = {}
#     for code in tqdm(zipcodes, desc="Processing ZIP codes"):
#         result = geolocator.geocode(query={'postalcode': code, 'country': 'Nederland'}, addressdetails=True)
#         if result is not None and 'boundingbox' in result.raw:
#             zip_dict[code] = result.raw['boundingbox']

#     with open(ZIPCODE_JSON_PATH, 'w') as f:
#         json.dump(zip_dict, f)

# def find_zip(
#     point,
#     zip_dict
# ):
#     lon, lat = float(point[0]), float(point[1])
#     for zip_code, box in zip_dict.items():
#         min_lat, max_lat, min_lon, max_lon = map(float, box)
#         if min_lat <= lat <= max_lat and min_lon <= lon <= max_lon:
#             return zip_code
#     return None

# def map_point_to_zip(
#     df,
#     zip_dict
# ):
#     for i, row in tqdm(df.iterrows()):
#         p = (row['LNG'], row["LAT"])
#         zip_code = find_zip(p, zip_dict)
#         df.at[i, 'Zipcode'] = zip_code
#     return df

# df_tree_incidents = extract_zip_4(df=df_tree_incidents, zip_col=ZIP_KEY, new_col=ZIP4_KEY)

# # map trees to zipcode

# # takes a while only run if necessary
# # map_zip_to_crs(df=df_tree_incidents)

# # TODO: Werkt niet goed, ergens zit een probleem met de zips

# with open(ZIPCODE_JSON_PATH, 'r') as f:
#     zip_dict = json.load(f)

# trees = map_point_to_zip(trees, zip_dict)
# trees = extract_zip_4(df=trees, zip_col='Zipcode', new_col='Zip4')

# trees.to_csv(TREE_DATA_WITH_ZIP_PATH, sep=",", encoding="utf-8", index=False)

### Grid - Tree mapping

Nog niet perfect maar methode werkt.

In [7]:

geolocator = Nominatim(user_agent="my_geocoder")

# Get coordinates for Amsterdam
location = geolocator.geocode("Amsterdam, Netherlands")
amsterdam_lat, amsterdam_lon = location.latitude, location.longitude

amsterdam_bbox = AMSTERDAM_BBOX

# Calculate grid bounds
lat_step = GRID_SIZE / 111000  # 1 degree of latitude is approximately 111 kilometers
lon_step = (GRID_SIZE / 111000) / np.cos(np.radians(amsterdam_lat))  # Correct for latitude

grid_polygons = []
for lat in np.arange(amsterdam_bbox[0], amsterdam_bbox[2], lat_step):
    for lon in np.arange(amsterdam_bbox[1], amsterdam_bbox[3], lon_step):
        polygon = Polygon([
            (lon, lat),
            (lon + lon_step, lat),
            (lon + lon_step, lat + lat_step),
            (lon, lat + lat_step),
            (lon, lat),
        ])
        grid_polygons.append(polygon)

grid_gdf = gpd.GeoDataFrame(geometry=grid_polygons, crs="EPSG:4326")

In [8]:
# create gdf from trees
tree_gdf = gpd.GeoDataFrame(trees, geometry=gpd.points_from_xy(trees['LNG'], trees['LAT']), crs="EPSG:4326")
# create gdf from indicents
incident_gdf = gpd.GeoDataFrame(incidents_weather_df, geometry=gpd.points_from_xy(incidents_weather_df['LON'], incidents_weather_df['LAT']), crs="EPSG:4326")
#join with grid gdf
tree_gdf = gpd.sjoin(tree_gdf, grid_gdf, how="left", op="within")
incident_gdf = gpd.sjoin(incident_gdf, grid_gdf, how="left", op="within")


  if await self.run_code(code, result, async_=asy):
  if await self.run_code(code, result, async_=asy):


In [9]:
#clean up gdf
tree_gdf = tree_gdf.rename(columns={"index_right" : "grid_id", "geometry" : "location"})
incident_gdf = incident_gdf.rename(columns={"index_right" : "grid_id", "geometry" : "location"})

#rename categories in new col, in place so only run once
tree_gdf['boomhoogte'] = [MAP_BOOMHOOGTE[klasse] if not klasse is np.nan else np.nan for klasse in tree_gdf.boomhoogteklasseActueel.values]
tree_gdf['stamdiameter'] = [MAP_STAMDIAMETER[klasse] if not klasse is np.nan else np.nan for klasse in tree_gdf.stamdiameterklasse.values]

# get rid of unnecessary columns
tree_gdf = tree_gdf[TREE_COLUMNS]

# save to new df
tree_gdf.to_csv(TREE_DATA_CLEAN_PATH, sep=",", encoding="utf-8", index=False)

In [10]:
import plotly.express as px
def plot_spacial_data(
    grid_gdf,
    tree_gdf,
    incident_gdf,
    plot_trees = True,
    plot_incidents = True
):
    # Create a plotly figure
    fig = px.choropleth_mapbox(grid_gdf, 
                                geojson=grid_gdf.geometry.__geo_interface__, 
                                locations=grid_gdf.index,
                                mapbox_style="open-street-map",
                                zoom=11, center={"lat": amsterdam_bbox[0], "lon": amsterdam_bbox[1]},
                                opacity=0.1,
                                )

    if plot_trees:
        # Add scatter plot for tree points
        fig.add_scattermapbox(
            lat=tree_gdf.location.y,
            lon=tree_gdf.location.x,
            mode='markers',
            marker=dict(
                size=4,
                color='green',
                opacity=0.7,
            ),
            text=tree_gdf['tree_id'].astype(str),
            name='Trees'
        )
    if plot_incidents:
        # Add scatter plot for tree points
        fig.add_scattermapbox(
            lat=incident_gdf.location.y,
            lon=incident_gdf.location.x,
            mode='markers',
            marker=dict(
                size=4,
                color='red',
                opacity=0.7,
            ),
            text=incident_gdf.Incident_ID.astype(str),
            name='Incidents'
        )
    # Update the layout to make it interactive
    fig.update_layout(mapbox_style="carto-positron")
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    fig.show()
    

### Enrich grid gdf

In [11]:

''' 
grid gdf cols
grid_id, geometery, has_tree, [tree_counts], [averages] 
'''

def convert_cat_to_avg(
    cat_values,
    delimeter = "-"
):
    ''' 
    Converts categorical values to means of type float
    Splits cat values on delimter, computes the mean for each cat
    Returns mean of all means of the categories
    '''
    means = []
    for cat in cat_values:
        if not isinstance(cat, str):
            continue
        if not delimeter in cat:
            continue
        vals = cat.split(delimeter)
        means.append(np.mean([float(val) for val in vals]))
    m = round(np.mean(means), 3)
    return 0 if np.isnan(m) else m


def enrich_grid_df(
    grid_gdf,
    tree_gdf
):
    for i in grid_gdf.index:
        tree_sub_df = tree_gdf[tree_gdf.grid_id == i]
        if len(tree_sub_df)>0:
            # Compute and add averages for height, diameter and year
            grid_gdf.at[i, "avg_height"] = convert_cat_to_avg(tree_sub_df.boomhoogte.values)
            grid_gdf.at[i, "avg_diameter"] = convert_cat_to_avg(tree_sub_df.stamdiameter.values)
            grid_gdf.at[i, "avg_year"] = round(np.mean(tree_sub_df.jaarVanAanleg.values), 3)
            # Add soortnaam counts
            for name, count in tree_sub_df.soortnaamKort.value_counts().items():
                grid_gdf.at[i, "has_tree"] = True
                grid_gdf.at[i, name] = count
        else:
            grid_gdf.at[i, "has_tree"] = False


enrich_grid_df(grid_gdf=grid_gdf, tree_gdf=tree_gdf)

  grid_gdf.at[i, "has_tree"] = False
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rco

In [12]:
incident_gdf.to_csv(INCIDENTS_WEATHER_GEO_PATH, sep=",", encoding="utf-8", index=False)

In [13]:
grid_gdf = grid_gdf.fillna(0)
grid_gdf[grid_gdf.has_tree == True]
grid_gdf['grid_id'] = grid_gdf.index

grid_gdf.to_csv(GRID_DATA_PATH, sep=",", encoding="utf-8", index=False)

  grid_gdf = grid_gdf.fillna(0)


### Create train / test sets

In [14]:
def display_full_df(
    df
):
    pd.set_option('display.max_rows', None)
    print(df.to_string(index=False))
    pd.reset_option('display.max_rows')

In [15]:
#convert dates to datetime objects
incident_gdf.Date = pd.to_datetime(incident_gdf.Date)

In [16]:
# Pick necessary columns
incident_sub_gdf = incident_gdf[RF_INCIDENT_COLUMNS]

grid_sub_gdf = grid_gdf[RF_GRID_COLUMNS]

tree_gdf = tree_gdf.rename(columns={"id" : "tree_id"})
tree_sub_gdf = tree_gdf[RF_TREE_COLUMNS]

In [17]:
positive_samples = grid_sub_gdf.merge(incident_sub_gdf, on='grid_id', how='inner')

In [18]:
positive_samples.to_csv(POSITIVE_SAMPLES_PATH, sep=",", encoding="utf-8", index=False)

In [19]:
display_full_df(positive_samples)

 grid_id  has_tree  avg_height  avg_diameter  avg_year  Fraxinus  Salix  Alnus  Quercus  Tilia  Acer  Populus  Betula  Prunus  Platanus  Malus  Robinia  Crataegus  Ulmus  Carpinus  Overig  Onbekend  Incident_ID              Service_Area       Date  Hour  temperature_2m  relative_humidity_2m  dew_point_2m  apparent_temperature  precipitation  rain  snowfall  snow_depth  weather_code  pressure_msl  surface_pressure  wind_speed_10m  wind_direction_10m  wind_gusts_10m  soil_temperature_0_to_7cm  soil_temperature_7_to_28cm  soil_temperature_28_to_100cm  soil_temperature_100_to_255cm  soil_moisture_0_to_7cm  soil_moisture_7_to_28cm  soil_moisture_28_to_100cm  soil_moisture_100_to_255cm
    1853      True      10.500         0.400  1984.000       0.0    0.0    1.0      0.0    0.0   0.0      0.0     0.0     0.0       0.0    0.0      0.0        0.0    0.0       0.0     0.0       0.0       394418                     Weesp 2022-02-21    12        8.243500             76.970634      4.443500      

In [20]:
#TODO: change date window
def verify_sample(
    incidents,
    grid_id,
    date,
    window = DATE_WINDOW
):
    start_date = date - pd.DateOffset(days=window)
    end_date = date + pd.DateOffset(days=window)

    grids = incidents[(incidents['Date'] >= start_date) & (incidents['Date'] <= end_date)].values

    return False if grid_id not in grids else True


def sample_negatives(
    positives,
    incidents,
    grid
):
    grids_with_trees = list(grid[grid.has_tree == True].grid_id.values)
    negatives = positives[['Date', 'Hour']]
    negatives[RF_GRID_COLUMNS] = None

    for i, row in negatives.iterrows():
        random_grid = random.sample(grids_with_trees, 1)[0]
        while(verify_sample(incidents, random_grid, row.Date)):
            random_grid = random.sample(grids_with_trees, 1)[0]
        grid_data = grid[grid.grid_id == random_grid][RF_GRID_COLUMNS].reset_index(drop=True)
        negatives.loc[i, RF_GRID_COLUMNS] = grid_data.iloc[0]

    return negatives


negative_samples = sample_negatives(positive_samples, incident_sub_gdf, grid_sub_gdf)
negative_samples.to_csv(NEGATIVE_SAMPLES_PATH, sep=",", encoding="utf-8", index=False)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  negatives[RF_GRID_COLUMNS] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  negatives[RF_GRID_COLUMNS] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  negatives[RF_GRID_COLUMNS] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_index

In [21]:
from GetWeather import GetWeather

weather_getter = GetWeather(grid_path=GRID_DATA_PATH, samples_path=NEGATIVE_SAMPLES_PATH, sleep_time=60)  

grid_df = pd.read_csv("data_bomen/grid_enriched_200.csv")


negative_samples = weather_getter.add_weather_data()

  result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs)
  result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs)
  return bound(*args, **kwds)


Splitting data in 10
Getting data for subsplit 0, length is 197
Took 12.858140230178833 seconds
Waiting for 60 seconds...
Getting data for subsplit 1, length is 197
Took 19.406673192977905 seconds
Waiting for 60 seconds...
Getting data for subsplit 2, length is 197
Took 10.710057020187378 seconds
Waiting for 60 seconds...
Getting data for subsplit 3, length is 197
Took 12.481285810470581 seconds
Waiting for 60 seconds...
Getting data for subsplit 4, length is 196
Took 12.351675033569336 seconds
Waiting for 60 seconds...
Getting data for subsplit 5, length is 196
Took 11.029696941375732 seconds
Waiting for 60 seconds...
Sleeping for an hour


KeyboardInterrupt: 

In [None]:
negative_samples

Unnamed: 0,Date,grid_id,LAT,LON,Hour,has_tree,avg_height,avg_diameter,avg_year,Fraxinus,...,wind_direction_100m,wind_gusts_10m,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm
0,2022-02-21,12087.0,52.393207,4.993404,12.0,True,10.460,0.750,1217.173,15.0,...,,,,,,,,,,
1,2017-09-13,11203.0,52.384198,4.907817,15.0,True,14.000,0.443,1813.671,2.0,...,,,,,,,,,,
2,2023-11-02,10334.0,52.375189,4.866500,17.0,True,10.077,0.325,1984.441,0.0,...,,,,,,,,,,
3,2022-11-17,2512.0,52.292306,4.996356,2.0,True,0.000,0.000,0.000,0.0,...,,,,,,,,,,
4,2023-07-05,12193.0,52.395009,4.801572,11.0,True,7.355,0.176,2006.355,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1959,,,,,,,,,,,...,267.510498,28.080000,20.536999,16.636999,15.887000,10.887000,0.593,0.527,0.536,0.660
1960,,,,,,,,,,,...,141.842728,11.159999,23.967501,21.417501,16.667501,12.917500,0.446,0.506,0.475,0.600
1961,,,,,,,,,,,...,268.999329,104.760002,13.217500,16.467501,15.967500,10.767500,0.727,0.509,0.547,0.666
1962,,,,,,,,,,,...,268.903595,97.919998,13.028501,16.478498,15.928500,10.778501,0.733,0.474,0.538,0.661


ValueError: cannot reindex on an axis with duplicate labels