In [1]:
import pandas as pd
import numpy as np

In [2]:
# 1
df = pd.read_csv('griselbridge.csv', index_col=0)
df['material'].replace(to_replace=np.nan, value='unknown', inplace=True)
print(df)

         material  age  length  mosscover covered cursed waterbody  guardian
bridgeid                                                                    
1           stone  2.5     4.5        0.8       n      y     river     troll
2           stone  3.8     3.1        0.5       n      y     marsh       NaN
3         unknown  2.1     4.4        0.6       n      n     marsh     troll
4            wood  1.2     2.2        0.6       n      n     creek  squirrel
5           brick  2.6     3.8        0.9       n      n     river     troll
...           ...  ...     ...        ...     ...    ...       ...       ...
929         brick  1.6     1.9        0.7       n      y     marsh  squirrel
930         stone  3.7     4.2        0.5       n      y     marsh  squirrel
931         brick  2.0     2.7        0.4       n      n     river      none
932          wood  1.7     2.5        0.9       n      n     river  squirrel
933         stone  4.4     3.9        0.6       n      y     marsh      ogre

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['material'].replace(to_replace=np.nan, value='unknown', inplace=True)


In [3]:
# 2
df_materials = df[['material', 'length']].groupby('material').agg(['mean', 'median'])
print(df_materials)
materials = ['brick', 'stone', 'wood', 'unknown']
for mat in materials:
    avg_length = df_materials.loc[mat][0]
    median_length = df_materials.loc[mat][1]
    print(f"The average length of a {mat} bridge is {avg_length:.3f} and the median length is {median_length:.3f}")

print()
print(df[['age', 'cursed', 'mosscover']].groupby('cursed').agg('mean'))
print('Cursed bridges tend to be older and more mosscovered.')

            length       
              mean median
material                 
brick     2.906173    2.8
stone     3.041315    3.1
unknown   4.080000    4.2
wood      2.948851    2.9
The average length of a brick bridge is 2.906 and the median length is 2.800
The average length of a stone bridge is 3.041 and the median length is 3.100
The average length of a wood bridge is 2.949 and the median length is 2.900
The average length of a unknown bridge is 4.080 and the median length is 4.200

             age  mosscover
cursed                     
n       2.117427   0.488581
y       2.434921   0.728617
Cursed bridges tend to be older and more mosscovered.


  avg_length = df_materials.loc[mat][0]
  median_length = df_materials.loc[mat][1]


In [4]:
# 3
df['mossqty'] = df['mosscover'] * df['length']
df['mossrate'] = df['mossqty'] / df['age']
print(df[['mosscover', 'length', 'age', 'mossqty', 'mossrate']])
print(df[['material', 'mossrate']].groupby('material').agg(['mean']))
for mat in materials:
    print(f"Moss grows at a rate of {df[['material', 'mossrate']].groupby('material').agg(['mean']).loc[mat][0]:.3f} on bridges of {mat}")
print("Material with the fastest growth rate: wood")
print("Material with the slowest growth rate: brick\n")
print(df[['covered', 'mossrate']].groupby('covered').agg('mean'))
print("Covered bridges encourage slower moss growth")



          mosscover  length  age  mossqty  mossrate
bridgeid                                           
1               0.8     4.5  2.5     3.60  1.440000
2               0.5     3.1  3.8     1.55  0.407895
3               0.6     4.4  2.1     2.64  1.257143
4               0.6     2.2  1.2     1.32  1.100000
5               0.9     3.8  2.6     3.42  1.315385
...             ...     ...  ...      ...       ...
929             0.7     1.9  1.6     1.33  0.831250
930             0.5     4.2  3.7     2.10  0.567568
931             0.4     2.7  2.0     1.08  0.540000
932             0.9     2.5  1.7     2.25  1.323529
933             0.6     3.9  4.4     2.34  0.531818

[933 rows x 5 columns]
          mossrate
              mean
material          
brick     0.857133
stone     0.890560
unknown   0.984254
wood      1.403203
Moss grows at a rate of 0.857 on bridges of brick
Moss grows at a rate of 0.891 on bridges of stone
Moss grows at a rate of 1.403 on bridges of wood
Moss grows at a ra

  print(f"Moss grows at a rate of {df[['material', 'mossrate']].groupby('material').agg(['mean']).loc[mat][0]:.3f} on bridges of {mat}")
  print(f"Moss grows at a rate of {df[['material', 'mossrate']].groupby('material').agg(['mean']).loc[mat][0]:.3f} on bridges of {mat}")
  print(f"Moss grows at a rate of {df[['material', 'mossrate']].groupby('material').agg(['mean']).loc[mat][0]:.3f} on bridges of {mat}")
  print(f"Moss grows at a rate of {df[['material', 'mossrate']].groupby('material').agg(['mean']).loc[mat][0]:.3f} on bridges of {mat}")


In [5]:
# 4
df['guardian'].dropna(inplace=True)
df['flow'] = np.where((df['waterbody'] == 'river') | (df['waterbody'] == 'creek'), 'y', 'n')

conditions = [
    df['guardian'] == 'ogre',
    df['guardian'] == 'troll',
    df['guardian'] == 'squirrel',
    df['guardian'] == True,
]

values = [
    0.1, 0.2, 0.01, 0
]

df['threatlvl'] = np.select(conditions, values)
print(df[['waterbody', 'flow', 'guardian', 'threatlvl']])

         waterbody flow  guardian  threatlvl
bridgeid                                    
1            river    y     troll       0.20
2            marsh    n       NaN       0.00
3            marsh    n     troll       0.20
4            creek    y  squirrel       0.01
5            river    y     troll       0.20
...            ...  ...       ...        ...
929          marsh    n  squirrel       0.01
930          marsh    n  squirrel       0.01
931          river    y      none       0.00
932          river    y  squirrel       0.01
933          marsh    n      ogre       0.10

[933 rows x 4 columns]


In [6]:
# 5
print(df[['flow', 'threatlvl']].groupby('flow').agg('mean'))
print('Bridges over flowing water are less dangerous on average')
print()
print(df[['material', 'threatlvl']].groupby('material').agg('mean'))
print('Stone bridges are the most dangerous bridges')
print()
print(df[['material', 'flow', 'threatlvl']].groupby(['material', 'flow']).agg('mean'))
print('Brick bridges with no flow yield the highest threat level')

      threatlvl
flow           
n      0.117584
y      0.099262
Bridges over flowing water are less dangerous on average

          threatlvl
material           
brick      0.099660
stone      0.110164
unknown    0.104000
wood       0.099886
Stone bridges are the most dangerous bridges

               threatlvl
material flow           
brick    n      0.126667
         y      0.089274
stone    n      0.112397
         y      0.109283
unknown  n      0.200000
         y      0.080000
wood     n      0.112807
         y      0.093697
Brick bridges with no flow yield the highest threat level


In [7]:
# 6
print(df[(df['age'] > 1) & ((df['material'] == 'stone') | (df['material'] == 'brick')) & (df['length'] >= 4.2)])
print(df[(df['age'] > 1) & ((df['material'] == 'stone') | (df['material'] == 'brick')) & (df['length'] >= 4.2)].count())

         material  age  length  mosscover covered cursed waterbody  guardian  \
bridgeid                                                                       
1           stone  2.5     4.5        0.8       n      y     river     troll   
8           brick  3.1     4.3        0.8       n      n     marsh      ogre   
17          stone  2.9     4.3        1.0       n      n     river     troll   
21          stone  3.3     4.3        0.9       n      n     marsh  squirrel   
25          stone  1.5     4.2        0.8       n      n     river     troll   
...           ...  ...     ...        ...     ...    ...       ...       ...   
906         brick  2.9     4.2        0.8       n      y     marsh     troll   
911         stone  3.6     4.3        0.4       n      n     river      ogre   
912         stone  3.8     4.2        0.4       n      n     river      ogre   
914         brick  1.5     4.3        0.6       n      n     river     troll   
930         stone  3.7     4.2        0.

In [8]:
# 7
dfa = df.dropna()
print(dfa)
print(dfa.count())
print('The new dataframe has a count of 907')

         material  age  length  mosscover covered cursed waterbody  guardian  \
bridgeid                                                                       
1           stone  2.5     4.5        0.8       n      y     river     troll   
3         unknown  2.1     4.4        0.6       n      n     marsh     troll   
4            wood  1.2     2.2        0.6       n      n     creek  squirrel   
5           brick  2.6     3.8        0.9       n      n     river     troll   
6           stone  2.4     2.4        0.8       y      n     river  squirrel   
...           ...  ...     ...        ...     ...    ...       ...       ...   
929         brick  1.6     1.9        0.7       n      y     marsh  squirrel   
930         stone  3.7     4.2        0.5       n      y     marsh  squirrel   
931         brick  2.0     2.7        0.4       n      n     river      none   
932          wood  1.7     2.5        0.9       n      n     river  squirrel   
933         stone  4.4     3.9        0.

In [27]:
# 8
bridge_a = dfa[(dfa['material'] == 'wood') & (dfa['waterbody'] == 'river')][['material', 'waterbody', 'threatlvl']].groupby(['material', 'waterbody']).agg('mean')
bridge_b = dfa[(dfa['material'] == 'brick') & (dfa['waterbody'] == 'creek')][['material', 'waterbody', 'threatlvl']].groupby(['material', 'waterbody']).agg('mean')
bridge_c = dfa[(dfa['material'] == 'stone') & (dfa['waterbody'] == 'marsh')][['material', 'waterbody', 'threatlvl']].groupby(['material', 'waterbody']).agg('mean')

print('Bridge A:\n', bridge_a)
print('Bridge B:\n', bridge_b)
print('Bridge C:\n', bridge_c)
print("\nI would prefer to cross Bridge A, as it has the lowest threat level of 10.05%, compared to B's 10.93% and C's 11.62%")

Bridge A:
                     threatlvl
material waterbody           
wood     river       0.100471
Bridge B:
                     threatlvl
material waterbody           
brick    creek       0.109344
Bridge C:
                     threatlvl
material waterbody           
stone    marsh       0.116207

I would prefer to cross Bridge A, as it has the lowest threat level of 10.05%, compared to B's 10.93% and C's 11.62%


In [36]:
# 9
mean_moss = dfa['mosscover'].agg('mean')
std_moss = dfa['mosscover'].agg('std')

conditions = [
    dfa['mosscover'] < mean_moss - std_moss,
    dfa['mosscover'] <= mean_moss + std_moss,
    dfa['mosscover'] > mean_moss + std_moss
]

values = ['low', 'average', 'high']

dfa.loc[:, 'mosscat'] = np.select(conditions, values)

bridges = [
    {
        "bridge": "D",
        "material": "stone",
        "mosscover": .67,
        "waterbody": "marsh"
     },
     {
        "bridge": "E",
        "material": "brick",
        "mosscover": .45,
        "waterbody": "river"
     },
     {
        "bridge": "F",
        "material": "brick",
        "mosscover": .60,
        "waterbody": "creek"
     },
     {
        "bridge": "G",
        "material": "stone",
        "mosscover": .15,
        "waterbody": "creek"
     },
     {
        "bridge": "H",
        "material": "wood",
        "mosscover": .88,
        "waterbody": "river"
     }
]

results =[]
for bridge in bridges:
    bridge_name = bridge['bridge']
    material = bridge['material']
    mosscover = bridge['mosscover']
    waterbody = bridge['waterbody']

    if mosscover < mean_moss - std_moss:
        mosscat = 'low'
    elif mosscover <= mean_moss + std_moss:
        mosscat = 'average'
    else:
        mosscat = 'high'

    threat_level = dfa[(dfa['material'] == material) & (dfa['mosscat'] == mosscat) & (dfa['waterbody'] == waterbody)][['material', 'mosscat', 'waterbody', 'threatlvl']].groupby(['material', 'mosscat', 'waterbody']).agg('mean').values[0][0]
    threat_level = np.round(threat_level, 4)

    results.append({
        'Bridge': bridge_name,
        'Material': material,
        'Mosscat': mosscat,
        'Waterbody': waterbody,
        'ThreatLevel': threat_level
    })

result_df = pd.DataFrame(results).set_index('Bridge')
print(result_df)
print('\n You should cross bridge G because it has the lowest probability of being attacked, of 8.68%')

       Material  Mosscat Waterbody  ThreatLevel
Bridge                                         
D         stone  average     marsh       0.1227
E         brick  average     river       0.0868
F         brick  average     creek       0.1237
G         stone      low     creek       0.0600
H          wood     high     river       0.1281

 You should cross bridge G because it has the lowest probability of being attacked, of 8.68%
