In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import os
from shapely.geometry import LineString
import gemgis as gg

In [2]:

file_path = "C:/Users/chimi/Desktop/Python Data Science Projects/Urban Modeling Project/Predictive maintenance Model Data/Weather Dataset/annual_precip.csv"
df = pd.read_csv(file_path, skiprows=4)


In [3]:
df.head(5)

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,Aruba,ABW,Average precipitation in depth (mm per year),AG.LND.PRCP.MM,,,,,,,...,,,,,,,,,,
1,Africa Eastern and Southern,AFE,Average precipitation in depth (mm per year),AG.LND.PRCP.MM,,,,,,,...,,,,,,,,,,
2,Afghanistan,AFG,Average precipitation in depth (mm per year),AG.LND.PRCP.MM,,327.0,327.0,327.0,327.0,327.0,...,327.0,327.0,327.0,327.0,327.0,327.0,327.0,327.0,,
3,Africa Western and Central,AFW,Average precipitation in depth (mm per year),AG.LND.PRCP.MM,,,,,,,...,,,,,,,,,,
4,Angola,AGO,Average precipitation in depth (mm per year),AG.LND.PRCP.MM,,1010.0,1010.0,1010.0,1010.0,1010.0,...,1010.0,1010.0,1010.0,1010.0,1010.0,1010.0,1010.0,1010.0,,


In [4]:
# Filter in countries
countries_to_filter = ['Burundi','Angola','Cameroon','Ethiopia','Kenya','South Africa']
annualprecip_bycountries = df[df['Country Name'].isin(countries_to_filter)]

In [5]:
annualprecip_bycountries.head(5)

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
4,Angola,AGO,Average precipitation in depth (mm per year),AG.LND.PRCP.MM,,1010.0,1010.0,1010.0,1010.0,1010.0,...,1010.0,1010.0,1010.0,1010.0,1010.0,1010.0,1010.0,1010.0,,
16,Burundi,BDI,Average precipitation in depth (mm per year),AG.LND.PRCP.MM,,1274.0,1274.0,1274.0,1274.0,1274.0,...,1274.0,1274.0,1274.0,1274.0,1274.0,1274.0,1274.0,1274.0,,
42,Cameroon,CMR,Average precipitation in depth (mm per year),AG.LND.PRCP.MM,,1604.0,1604.0,1604.0,1604.0,1604.0,...,1604.0,1604.0,1604.0,1604.0,1604.0,1604.0,1604.0,1604.0,,
72,Ethiopia,ETH,Average precipitation in depth (mm per year),AG.LND.PRCP.MM,,,,,,,...,848.0,848.0,848.0,848.0,848.0,848.0,848.0,848.0,,
121,Kenya,KEN,Average precipitation in depth (mm per year),AG.LND.PRCP.MM,,630.0,630.0,630.0,630.0,630.0,...,630.0,630.0,630.0,630.0,630.0,630.0,630.0,630.0,,


In [6]:
# Melt the data
melted_data = annualprecip_bycountries.melt(id_vars=['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code'], var_name='Year', value_name='Annual Precipitation')

In [7]:
melted_data.head(5)

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,Year,Annual Precipitation
0,Angola,AGO,Average precipitation in depth (mm per year),AG.LND.PRCP.MM,1960,
1,Burundi,BDI,Average precipitation in depth (mm per year),AG.LND.PRCP.MM,1960,
2,Cameroon,CMR,Average precipitation in depth (mm per year),AG.LND.PRCP.MM,1960,
3,Ethiopia,ETH,Average precipitation in depth (mm per year),AG.LND.PRCP.MM,1960,
4,Kenya,KEN,Average precipitation in depth (mm per year),AG.LND.PRCP.MM,1960,


In [8]:
melted_data['Indicator Name'].value_counts()

Average precipitation in depth (mm per year)    372
Name: Indicator Name, dtype: int64

In [9]:
# Renaming the column indicator name column to Average precipitation in depth (mm per year) 
melted_data.drop(columns=['Indicator Name','Indicator Code'])

Unnamed: 0,Country Name,Country Code,Year,Annual Precipitation
0,Angola,AGO,1960,
1,Burundi,BDI,1960,
2,Cameroon,CMR,1960,
3,Ethiopia,ETH,1960,
4,Kenya,KEN,1960,
...,...,...,...,...
367,Burundi,BDI,2021,
368,Cameroon,CMR,2021,
369,Ethiopia,ETH,2021,
370,Kenya,KEN,2021,


In [10]:
melted_data.info

<bound method DataFrame.info of      Country Name Country Code                                Indicator Name  \
0          Angola          AGO  Average precipitation in depth (mm per year)   
1         Burundi          BDI  Average precipitation in depth (mm per year)   
2        Cameroon          CMR  Average precipitation in depth (mm per year)   
3        Ethiopia          ETH  Average precipitation in depth (mm per year)   
4           Kenya          KEN  Average precipitation in depth (mm per year)   
..            ...          ...                                           ...   
367       Burundi          BDI  Average precipitation in depth (mm per year)   
368      Cameroon          CMR  Average precipitation in depth (mm per year)   
369      Ethiopia          ETH  Average precipitation in depth (mm per year)   
370         Kenya          KEN  Average precipitation in depth (mm per year)   
371  South Africa          ZAF  Average precipitation in depth (mm per year)   

     In

In [11]:
melted_data.dtypes

Country Name             object
Country Code             object
Indicator Name           object
Indicator Code           object
Year                     object
Annual Precipitation    float64
dtype: object

In [12]:
# Change the data types
if melted_data['Year'].dtype == 'object' or melted_data['Year'].dtype == 'str':
    melted_data['Year'] = pd.to_numeric(melted_data['Year'], errors='coerce')

In [13]:
# Filter in from year 2000 to 2021
filtered_data = melted_data[(melted_data['Year'] >= 2000) & (melted_data['Year'] <= 2021)]

In [14]:
# Check for missing values
filtered_data.isnull().sum()

Country Name             0
Country Code             0
Indicator Name           0
Indicator Code           0
Year                     0
Annual Precipitation    12
dtype: int64

In [15]:
# Drop the missing values
filtered_data = filtered_data.dropna()

In [16]:
# Make sure there are no null values
filtered_data.isnull().sum()

Country Name            0
Country Code            0
Indicator Name          0
Indicator Code          0
Year                    0
Annual Precipitation    0
dtype: int64

In [17]:
# Calculate the average precipitation by country from 2000 to 2021
average_precip = filtered_data.groupby(['Country Name']).mean().reset_index()

In [18]:
# Total number of hours in a year
hours_per_year = 24 * 365

average_precip['Average Precipitation Rate(mm/h by year)'] = average_precip['Annual Precipitation'] / hours_per_year
average_precip

Unnamed: 0,Country Name,Year,Annual Precipitation,Average Precipitation Rate(mm/h by year)
0,Angola,2009.5,1010.0,0.12
1,Burundi,2009.5,1271.2,0.15
2,Cameroon,2009.5,1604.0,0.18
3,Ethiopia,2009.5,848.0,0.1
4,Kenya,2009.5,633.1,0.07
5,South Africa,2009.5,495.0,0.06


In [19]:
average_precip.drop(columns=['Year'],axis=0,inplace=True)

In [20]:
# Rename the Country Name column to Countru
average_precip.rename(columns={'Country Name':'Country'},inplace=True)

In [21]:
# Final average_precip
average_precip

Unnamed: 0,Country,Annual Precipitation,Average Precipitation Rate(mm/h by year)
0,Angola,1010.0,0.12
1,Burundi,1271.2,0.15
2,Cameroon,1604.0,0.18
3,Ethiopia,848.0,0.1
4,Kenya,633.1,0.07
5,South Africa,495.0,0.06


In [22]:
# Read the road dataset
# Calculating PCI index
file_path = r"C:\Users\chimi\Desktop\Python Data Science Projects\Urban Modeling Project\Predictive maintenance Model Data\Roads\roadmain_data.csv"
roads_data = pd.read_csv(file_path)

In [23]:
roads_data.shape

(12283, 15)

In [24]:
roads_data.columns

Index(['REGION', 'ROADNO', 'STARTDESC', 'ENDDESC', 'LENGTHKM', 'WIDTH', 'AADT',
       'PAVETYPE', 'CONDITION', 'LANES', 'geometry', 'Country', 'SURFTYP',
       'CLASS', 'slope'],
      dtype='object')

In [25]:
roads_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12283 entries, 0 to 12282
Data columns (total 15 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   REGION     12283 non-null  object 
 1   ROADNO     12282 non-null  object 
 2   STARTDESC  12279 non-null  object 
 3   ENDDESC    12277 non-null  object 
 4   LENGTHKM   12283 non-null  float64
 5   WIDTH      12283 non-null  float64
 6   AADT       12283 non-null  float64
 7   PAVETYPE   12283 non-null  object 
 8   CONDITION  12283 non-null  object 
 9   LANES      12283 non-null  float64
 10  geometry   12283 non-null  object 
 11  Country    12283 non-null  object 
 12  SURFTYP    12236 non-null  object 
 13  CLASS      10562 non-null  object 
 14  slope      12283 non-null  float64
dtypes: float64(5), object(10)
memory usage: 1.4+ MB


In [26]:
merged_data = roads_data.merge(average_precip[['Country', 'Average Precipitation Rate(mm/h by year)']], on='Country', how='left')

In [27]:
merged_data.head(5)

Unnamed: 0,REGION,ROADNO,STARTDESC,ENDDESC,LENGTHKM,WIDTH,AADT,PAVETYPE,CONDITION,LANES,geometry,Country,SURFTYP,CLASS,slope,Average Precipitation Rate(mm/h by year)
0,Bujumbura-Mairie,RN7,Bujumbura,Nyakarara,4.18,6.0,524.0,Asphalt,Bad,2.0,LINESTRING (12.965945581671459 -7.041666075978...,Burundi,Paved,,0.88,0.15
1,Bujumbura-Rural,RN7,Bujumbura,Nyakarara,16.29,6.0,524.0,Asphalt,Bad,2.0,LINESTRING (13.59030560021181 -8.5844107579491...,Burundi,Paved,,0.68,0.15
2,Bururi,RN7,Unknown,jct RN 16,21.41,6.0,75.0,Asphalt,Good,2.0,LINESTRING (13.7307457308967 -7.83080970764367...,Burundi,Paved,,0.4,0.15
3,Gitega,RN7,Unknown,jct RN 16,2.33,6.0,75.0,Asphalt,Good,2.0,LINESTRING (12.97150629518783 -7.6364261839293...,Burundi,Paved,,0.54,0.15
4,Bururi,RN7,jct RN 16,jct RN 8,17.08,6.0,136.0,Asphalt,Good,2.0,LINESTRING (13.702648452544794 -9.284267848298...,Burundi,Paved,,1.88,0.15


In [28]:
merged_data['Average Precipitation Rate(mm/h by year)'].value_counts()

0.07    8275
0.06    1803
0.18    1443
0.12     408
0.15     278
0.10      76
Name: Average Precipitation Rate(mm/h by year), dtype: int64

In [29]:
merged_data['PAVETYPE'].value_counts()

Earth                  4150
Gravel                 3098
FLEX                   1490
Earth Road             1224
Surface Dressing        719
Asphalt                 660
Premix                  260
Unknown                 246
Track                   213
JPCC                     57
Gravel Road              47
Concrete (Jt-Plain)      36
Gap                      31
Concrete Block           13
CRCP                     10
AC                        8
Sand                      7
Paved                     5
Unpaved                   4
ST                        3
Set Stone                 2
Name: PAVETYPE, dtype: int64

In [30]:
# Drop the unknown surface type rows
merged_data.drop(merged_data[merged_data['PAVETYPE'].isin(['Unknown', ''])].index, inplace=True)

In [31]:
# Make sure unknown rows were dropped
merged_data['PAVETYPE'].value_counts()

Earth                  4150
Gravel                 3098
FLEX                   1490
Earth Road             1224
Surface Dressing        719
Asphalt                 660
Premix                  260
Track                   213
JPCC                     57
Gravel Road              47
Concrete (Jt-Plain)      36
Gap                      31
Concrete Block           13
CRCP                     10
AC                        8
Sand                      7
Paved                     5
Unpaved                   4
ST                        3
Set Stone                 2
Name: PAVETYPE, dtype: int64

In [32]:
# # Calculate Runoff Coefficents :https://www.researchgate.net/figure/The-runoff-coefficient-of-different-land-types_tbl1_276044494
#https://www.researchgate.net/figure/Guidelines-for-surface-runoff-coefficients-by-the-Ministry-of-Education-Culture-Sports_tbl1_239789469
# These are standard rational coefficents complied  from those sites
surface_types = {
    'Earth': 0.4,
    'Earth Road': 0.4,
    'Track': 0.3,
    'Gravel': 0.5,
    'Gravel Road': 0.5,
    'Sand': 0.25,
    'ST (Set Stone)': 0.35 ,
    'Asphalt': 0.90,
    'AC':0.9,
    'JPCC': 0.85,
    'Concrete (Jt-Plain)': 0.85,
    'CRCP': 0.85,
    'FLEX': 0.85,
    'Premix': 0.85,
    'Surface Dressing': 0.85,
    'Concrete Block': 0.85,
    'Paved': 0.85,
    'Unpaved' : 0.25,
    'ST' : 0.85,
    'Set Stone': 0.85,
    'Gap':0.85
}


In [33]:
merged_data['runfoff_coef'] = merged_data['PAVETYPE'].replace(surface_types)

In [34]:
merged_data['runfoff_coef'].value_counts()

0.40    5374
0.50    3145
0.85    2626
0.90     668
0.30     213
0.25      11
Name: runfoff_coef, dtype: int64

In [35]:
merged_data.head(5)

Unnamed: 0,REGION,ROADNO,STARTDESC,ENDDESC,LENGTHKM,WIDTH,AADT,PAVETYPE,CONDITION,LANES,geometry,Country,SURFTYP,CLASS,slope,Average Precipitation Rate(mm/h by year),runfoff_coef
0,Bujumbura-Mairie,RN7,Bujumbura,Nyakarara,4.18,6.0,524.0,Asphalt,Bad,2.0,LINESTRING (12.965945581671459 -7.041666075978...,Burundi,Paved,,0.88,0.15,0.9
1,Bujumbura-Rural,RN7,Bujumbura,Nyakarara,16.29,6.0,524.0,Asphalt,Bad,2.0,LINESTRING (13.59030560021181 -8.5844107579491...,Burundi,Paved,,0.68,0.15,0.9
2,Bururi,RN7,Unknown,jct RN 16,21.41,6.0,75.0,Asphalt,Good,2.0,LINESTRING (13.7307457308967 -7.83080970764367...,Burundi,Paved,,0.4,0.15,0.9
3,Gitega,RN7,Unknown,jct RN 16,2.33,6.0,75.0,Asphalt,Good,2.0,LINESTRING (12.97150629518783 -7.6364261839293...,Burundi,Paved,,0.54,0.15,0.9
4,Bururi,RN7,jct RN 16,jct RN 8,17.08,6.0,136.0,Asphalt,Good,2.0,LINESTRING (13.702648452544794 -9.284267848298...,Burundi,Paved,,1.88,0.15,0.9


In [36]:
merged_data.tail(5)

Unnamed: 0,REGION,ROADNO,STARTDESC,ENDDESC,LENGTHKM,WIDTH,AADT,PAVETYPE,CONDITION,LANES,geometry,Country,SURFTYP,CLASS,slope,Average Precipitation Rate(mm/h by year),runfoff_coef
12278,KwaZulu-Natal,R66,Gingindlovu,Nkwalini,48.91,12.8,0.0,FLEX,Unknown,2.0,"LINESTRING (31.61473812 -29.07737001, 31.61465...",South Africa,Paved,Primary,inf,0.06,0.85
12279,KwaZulu-Natal,R66,Gingindlovu,Nkwalini,48.91,12.8,0.0,FLEX,Unknown,2.0,LINESTRING (31.59042590231682 -29.030550211759...,South Africa,Paved,Primary,-6.5,0.06,0.85
12280,KwaZulu-Natal,N2,Hibberdene I/C,Mtwalume I/C,8.44,20.2,11885.0,FLEX,Very Good,2.0,"LINESTRING (30.57512688 -30.56287698, 30.57521...",South Africa,Paved,Primary,-1.09,0.06,0.85
12281,KwaZulu-Natal,N2,Hibberdene I/C,Mtwalume I/C,8.42,20.2,11885.0,FLEX,Very Good,2.0,"LINESTRING (30.574397159999997 -30.56320602, 3...",South Africa,Paved,Primary,-1.33,0.06,0.85
12282,KwaZulu-Natal,N2,Marburg,Umtentweni I/C,7.47,20.2,5807.0,FLEX,Very Good,2.0,"LINESTRING (30.43110492 -30.75247503, 30.43117...",South Africa,Paved,Primary,-2.45,0.06,0.85


In [37]:
# Apply rating scale to road condition
def apply_rating(rating):
    if rating == 'Excellent':
        return 95
    elif rating == 'Very Good':
        return 85
    elif rating == 'Good':
        return 75
    elif rating == 'Fair':
        return 60
    elif rating == 'Poor':
        return 45
    elif rating == 'Very Poor':
        return 30
    else:
        return 0
        
# Assign a lower PCI score to road segments with the same rating as "Poor" or "Fair"
merged_data['PCI'] = merged_data['CONDITION'].apply(apply_rating)
merged_data['MAINTENANCE_SCORE'] = merged_data['CONDITION'].apply(apply_rating)

In [38]:
merged_data

Unnamed: 0,REGION,ROADNO,STARTDESC,ENDDESC,LENGTHKM,WIDTH,AADT,PAVETYPE,CONDITION,LANES,geometry,Country,SURFTYP,CLASS,slope,Average Precipitation Rate(mm/h by year),runfoff_coef,PCI,MAINTENANCE_SCORE
0,Bujumbura-Mairie,RN7,Bujumbura,Nyakarara,4.18,6.00,524.00,Asphalt,Bad,2.00,LINESTRING (12.965945581671459 -7.041666075978...,Burundi,Paved,,0.88,0.15,0.90,0,0
1,Bujumbura-Rural,RN7,Bujumbura,Nyakarara,16.29,6.00,524.00,Asphalt,Bad,2.00,LINESTRING (13.59030560021181 -8.5844107579491...,Burundi,Paved,,0.68,0.15,0.90,0,0
2,Bururi,RN7,Unknown,jct RN 16,21.41,6.00,75.00,Asphalt,Good,2.00,LINESTRING (13.7307457308967 -7.83080970764367...,Burundi,Paved,,0.40,0.15,0.90,75,75
3,Gitega,RN7,Unknown,jct RN 16,2.33,6.00,75.00,Asphalt,Good,2.00,LINESTRING (12.97150629518783 -7.6364261839293...,Burundi,Paved,,0.54,0.15,0.90,75,75
4,Bururi,RN7,jct RN 16,jct RN 8,17.08,6.00,136.00,Asphalt,Good,2.00,LINESTRING (13.702648452544794 -9.284267848298...,Burundi,Paved,,1.88,0.15,0.90,75,75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12278,KwaZulu-Natal,R66,Gingindlovu,Nkwalini,48.91,12.80,0.00,FLEX,Unknown,2.00,"LINESTRING (31.61473812 -29.07737001, 31.61465...",South Africa,Paved,Primary,inf,0.06,0.85,0,0
12279,KwaZulu-Natal,R66,Gingindlovu,Nkwalini,48.91,12.80,0.00,FLEX,Unknown,2.00,LINESTRING (31.59042590231682 -29.030550211759...,South Africa,Paved,Primary,-6.50,0.06,0.85,0,0
12280,KwaZulu-Natal,N2,Hibberdene I/C,Mtwalume I/C,8.44,20.20,11885.00,FLEX,Very Good,2.00,"LINESTRING (30.57512688 -30.56287698, 30.57521...",South Africa,Paved,Primary,-1.09,0.06,0.85,85,85
12281,KwaZulu-Natal,N2,Hibberdene I/C,Mtwalume I/C,8.42,20.20,11885.00,FLEX,Very Good,2.00,"LINESTRING (30.574397159999997 -30.56320602, 3...",South Africa,Paved,Primary,-1.33,0.06,0.85,85,85


In [39]:
# Let combine this with the weather data
file_path = r'C:\Users\chimi\Desktop\Python Data Science Projects\Urban Modeling Project\Predictive maintenance Model Data\Weather Dataset\Weather data Jan 2017 to April 18 2023.csv'
weather_df = pd.read_csv(file_path)

In [40]:
weather_df.head(5)

Unnamed: 0,name,datetime,tempmax,tempmin,temp,feelslikemax,feelslikemin,feelslike,dew,humidity,...,solarenergy,uvindex,severerisk,sunrise,sunset,moonphase,conditions,description,icon,stations
0,Angola,2017-01-01,30.0,24.0,26.7,35.1,24.0,28.4,23.2,81.4,...,,,,2017-01-01T05:51:57,2017-01-01T18:29:26,0.11,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,"FNLU,66160099999"
1,Angola,2017-01-02,31.0,25.8,27.4,36.4,25.8,29.5,23.2,78.0,...,,,,2017-01-02T05:52:29,2017-01-02T18:29:50,0.15,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,"FNLU,66160099999"
2,Angola,2017-01-03,31.0,24.0,27.5,35.5,24.0,30.1,23.3,78.6,...,,,,2017-01-03T05:53:01,2017-01-03T18:30:13,0.18,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,"FNLU,66160099999"
3,Angola,2017-01-04,32.0,25.8,27.9,36.8,25.8,30.8,23.7,78.6,...,,,,2017-01-04T05:53:33,2017-01-04T18:30:35,0.22,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,"FNLU,66160099999"
4,Angola,2017-01-05,29.0,24.0,26.0,33.6,24.0,27.0,23.3,85.5,...,,,,2017-01-05T05:54:04,2017-01-05T18:30:57,0.25,Partially cloudy,Partly cloudy throughout the day.,partly-cloudy-day,"FNLU,66160099999"


In [41]:
weather_df['name'].value_counts()

Angola          2299
South Africa    2299
Kenya           2299
Cameroon        2299
Ethiopia        2299
Burundi         2299
Name: name, dtype: int64

In [42]:
weather_df.columns

Index(['name', 'datetime', 'tempmax', 'tempmin', 'temp', 'feelslikemax',
       'feelslikemin', 'feelslike', 'dew', 'humidity', 'precip', 'precipprob',
       'precipcover', 'preciptype', 'snow', 'snowdepth', 'windgust',
       'windspeed', 'winddir', 'sealevelpressure', 'cloudcover', 'visibility',
       'solarradiation', 'solarenergy', 'uvindex', 'severerisk', 'sunrise',
       'sunset', 'moonphase', 'conditions', 'description', 'icon', 'stations'],
      dtype='object')

In [43]:
weather_df.shape

(13794, 33)

In [44]:
# Rename the name column
weather_df.rename(columns={'name':'Country'},inplace=True)

In [45]:
weather_df.isnull().sum()

Country                 0
datetime                0
tempmax               225
tempmin               225
temp                  225
feelslikemax          225
feelslikemin          225
feelslike             225
dew                   227
humidity              225
precip               3925
precipprob            225
precipcover             0
preciptype          10444
snow                 3767
snowdepth            3769
windgust             9008
windspeed               0
winddir               591
sealevelpressure      505
cloudcover            381
visibility            252
solarradiation       9545
solarenergy          9545
uvindex              9545
severerisk          11010
sunrise                 0
sunset                  0
moonphase               0
conditions            225
description           225
icon                  225
stations              225
dtype: int64

In [46]:
# Drop some of the empty columns 
weather_df.drop(columns=['precip','preciptype','snow','snowdepth','windgust',
                       'solarradiation','solarenergy','uvindex','severerisk','sunset','sunrise',
                       'moonphase','conditions','description','icon','stations'],inplace=True)

In [52]:
# Merge the files
weather_df.columns

Index(['Country', 'datetime', 'tempmax', 'tempmin', 'temp', 'feelslikemax',
       'feelslikemin', 'feelslike', 'dew', 'humidity', 'precipprob',
       'precipcover', 'windspeed', 'winddir', 'sealevelpressure', 'cloudcover',
       'visibility'],
      dtype='object')

In [48]:
# Filter in 3 year columns / 5 year file is too large
#start_date = '2022-01-01'
#end_date = '2023-04-03'
#filtered_data = weather_df.loc[(weather_df['datetime'] >= start_date) & 
                               (weather_df['datetime'] <= end_date)]

In [49]:
# filtered_data.head(5)

Unnamed: 0,Country,datetime,tempmax,tempmin,temp,feelslikemax,feelslikemin,feelslike,dew,humidity,precipprob,precipcover,windspeed,winddir,sealevelpressure,cloudcover,visibility
1826,Angola,2022-01-01,29.0,24.0,26.1,32.7,24.0,27.3,22.9,83.2,0.0,0.0,18.4,228.8,1009.7,57.9,9.6
1827,Angola,2022-01-02,28.0,25.0,25.8,31.9,25.0,26.7,23.2,85.6,0.0,0.0,20.5,198.1,1009.1,71.6,9.6
1828,Angola,2022-01-03,29.0,24.0,26.4,32.0,24.0,27.8,23.3,83.5,0.0,0.0,16.6,238.3,1008.9,64.6,9.9
1829,Angola,2022-01-04,28.0,24.0,25.9,31.2,24.0,26.9,23.1,84.9,0.0,0.0,16.6,199.3,1010.5,87.0,9.9
1830,Angola,2022-01-05,29.0,23.0,25.9,32.0,23.0,26.9,22.0,80.2,0.0,0.0,16.6,214.3,1010.0,52.7,9.3


In [50]:
#merged_df = pd.merge(merged_data, filtered_data, on='Country')

In [None]:
#merged_df.to_csv('finalroads_data.csv', index=False)

In [None]:
# Calculating Annual Data From 5 year
# File too large for labtop  to handle it

In [None]:
# Calculate average tempmax	tempmin	temp	feelslikemax	feelslikemin	feelslike	dew	humidity	precipprob	precipcover	windspeed
# winddir	sealevelpressure	cloudcover	visibility from Jan 2017 to April 2023

In [76]:
countries_5yearavg = weather_df.groupby(['Country']).mean()

In [77]:
countries_5yearavg.head(5)

Unnamed: 0_level_0,tempmax,tempmin,temp,feelslikemax,feelslikemin,feelslike,dew,humidity,precipprob,precipcover,windspeed,winddir,sealevelpressure,cloudcover,visibility
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Angola,28.84,23.39,25.67,31.85,23.46,26.97,21.71,79.39,13.88,2.56,20.36,222.17,1011.0,59.0,10.03
Burundi,29.07,21.23,25.5,30.47,21.29,26.13,19.06,69.22,27.1,2.94,26.54,170.29,1012.81,64.22,11.48
Cameroon,29.33,20.34,23.87,31.46,20.34,24.38,20.87,85.35,17.95,6.13,10.72,227.62,1015.49,66.37,8.75
Ethiopia,22.47,12.45,17.08,22.4,12.28,17.05,8.33,60.75,25.95,1.7,22.73,134.34,1024.78,55.79,15.58
Kenya,25.19,15.49,19.84,24.98,15.49,19.8,13.18,68.75,22.32,1.43,26.54,94.18,1020.85,70.07,11.27


In [78]:
# Average of countries for the past 5 years
prefix = 'annualavg'
# Loop through each column and append the appropriate prefix
for i in range(len(countries_5yearavg.columns)):
    if not df.columns[i].startswith(prefix):
        countries_5yearavg.rename(columns={countries_5yearavg.columns[i]: prefix + countries_5yearavg.columns[i]}, inplace=True)


In [79]:
countries_5yearavg.head(5)

Unnamed: 0_level_0,annualavgtempmax,annualavgtempmin,annualavgtemp,annualavgfeelslikemax,annualavgfeelslikemin,annualavgfeelslike,annualavgdew,annualavghumidity,annualavgprecipprob,annualavgprecipcover,annualavgwindspeed,annualavgwinddir,annualavgsealevelpressure,annualavgcloudcover,annualavgvisibility
Country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Angola,28.84,23.39,25.67,31.85,23.46,26.97,21.71,79.39,13.88,2.56,20.36,222.17,1011.0,59.0,10.03
Burundi,29.07,21.23,25.5,30.47,21.29,26.13,19.06,69.22,27.1,2.94,26.54,170.29,1012.81,64.22,11.48
Cameroon,29.33,20.34,23.87,31.46,20.34,24.38,20.87,85.35,17.95,6.13,10.72,227.62,1015.49,66.37,8.75
Ethiopia,22.47,12.45,17.08,22.4,12.28,17.05,8.33,60.75,25.95,1.7,22.73,134.34,1024.78,55.79,15.58
Kenya,25.19,15.49,19.84,24.98,15.49,19.8,13.18,68.75,22.32,1.43,26.54,94.18,1020.85,70.07,11.27


In [80]:
# Combine merged_data and countries_5yearavg
final_df = pd.merge(merged_data, countries_5yearavg, on='Country', how='left')

In [81]:
final_df.to_csv('roads_maintenancefinal.csv',index=False)