### Loading the required packages

In [1]:
import warnings
import sys
import os
import csv
import time
import pandas as pd
import numpy as np
from numpy.random import randn

from sklearn import preprocessing
from scipy import stats
from scipy.stats import anderson
from scipy.stats import normaltest
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
from statsmodels.graphics.factorplots import interaction_plot
from statsmodels.graphics.gofplots import qqplot

import seaborn as sns
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import matplotlib.lines as mlines

import plotly.express as px
warnings.filterwarnings('ignore')

### Preview of the dataset

In [2]:
df = pd.read_csv('nebraska_deep.csv')
df.head()

Unnamed: 0,year,structureNumber,latitude,longitude,toll,owner,yearBuilt,averageDailyTraffic,designLoad,skew,...,baseDifferenceScore,precipitation,snowfall,freezethaw,deckDeteriorationScore,subDeteriorationScore,supDeteriorationScore,deckNumberIntervention,subNumberIntervention,supNumberIntervention
0,1992,C000100305,0,0,3,2,1935,30,0,0,...,0.230199,2.02,,,-0.25,-0.25,0.0,0.0,0.0,0.0
1,1992,C000100305P,0,0,3,2,1935,20,0,0,...,,,,,0.0,0.0,0.0,0.0,0.0,0.0
2,1992,C000100405,0,0,3,2,1925,65,0,40,...,,,,,0.0,0.0,0.0,0.0,0.0,0.0
3,1992,C000100505P,0,0,3,2,1974,60,4,30,...,,,,,0.0,-0.333333,0.0,1.0,0.0,0.0
4,1992,C000100905,0,0,3,2,1962,170,2,0,...,,,,,0.0,-1.125,0.0,1.0,1.0,1.0


### Understanding various categories of YB


In [13]:
print(df.groupby(['bridgeRoadwayWidthCurbToCurb'])['bridgeRoadwayWidthCurbToCurb'].count().reset_index(name='counts'))
df['BRwidth_category'] = pd.cut(df['bridgeRoadwayWidthCurbToCurb'],
                            [1905, 1946, 1987, 2020],
                            labels=['ancient', 'medieval', 'modern' ])
# Remove duplicates
df.drop_duplicates(subset=['structureNumber'], keep='last', inplace=True)

# Return total count of bridges wrt YB categories
df.groupby(['BRwidth_category'])['BRwidth_category'].count().reset_index(name='counts')

KeyError: 'bridgeRoadwayWidthCurbToCurb'

### Number of Bridges for each YB category 

In [12]:
yb_count = df.groupby(['yb_category'])['yb_category'].count().reset_index(name='counts')
yb_sum = df.groupby(['yb_category'])['yb_category'].sum().reset_index(name='sums')
fig = px.bar(yb_count, x='yb_category', y='counts')
fig.show()

TypeError: category type does not support sum operations

### Understanding the relationship between YB and intervention

In [5]:

df_grouped = df.groupby(['yb_category', 'subNumberIntervention'])['yb_category'].count().reset_index(name='count')

# Initialize list
ultra_light = []
very_light = []
light = []
moderate = []
high = []

# Identify unique groups,(Note: must be used with a dictionary to ensure positions are consitent)
unique_interventions = df_grouped['subNumberIntervention'].unique()

# Iterate through the dataframe
for ind in df_grouped.index:
    cat, numInt, total = df_grouped['yb_category'][ind], \
                          df_grouped['subNumberIntervention'][ind], \
                          df_grouped['count'][ind]
                
    if cat == 'ultra light':
        ultra_light.append(total)
    elif cat == 'very light':
        very_light.append(total)
    elif cat == 'light':
        light.append(total)
    elif cat == 'moderate':
        moderate.append(total)
    else:
        high.append(total)

### Percentage of bridges requiring substructure repair based on the ADT category

In [10]:
df_grouped['subNumberIntervention']=df_grouped['subNumberIntervention'].replace([0.0,1.0,2.0,3.0], ['Zero','One','Two', 'Three'])
df_grouped['Percentage'] = 100 * df_grouped['count'] / df_grouped.groupby('yb_category')['count'].transform('sum')
df_grouped['Percentage'] = [round(item, 2) for item in df_grouped['Percentage']]
fig=px.bar(df_grouped,x='subNumberIntervention',y='Percentage',color=  
    'yb_category', barmode='group',
     text=df_grouped['Percentage'])
fig.show()

1) Some bridges are missing from the adt_category == 'ultra light'. The missing bridges have null has the number of intervnetions. Similarly, there are missing bridges with repsect to other categories.
2) From the above figure we can observe that the substructure intervention of percentage of bridges with ultra light traffic are increasing whereas the bridges with high traffic are decreasing.