In [89]:
# Import dependencies
import pandas as pd
import plotly
import plotly.express as px
import plotly.graph_objects as go
from statsmodels.stats.outliers_influence import variance_inflation_factor


In [74]:
# Import data
df = pd.read_csv("working_data/prepared_data.csv")
df.head()

Unnamed: 0,location,set,a_b,location_type,classification,binary_temp,binary_moisture,Rounded to Sub-Rounded,Sub-Rounded to Sub-Angular,Sub-Angular to Angular,...,Deep Troughs,Crescentic Gouges,Arc Shaped Steps,Linear Steps,Sharp Angular Features,Upturned Plates,V Shaped Cracks,Edge Rounding,Breakage Blocks,Abrasion Features
0,NOR,5,B,river,cold-wet,cold,wet,0,1,0,...,0,0,1,0,0,0,1,1,0,0
1,NOR,5,B,river,cold-wet,cold,wet,0,1,0,...,0,1,0,1,0,0,0,1,0,0
2,NOR,5,B,river,cold-wet,cold,wet,0,0,1,...,0,0,1,0,0,0,0,0,0,0
3,NOR,5,B,river,cold-wet,cold,wet,0,0,1,...,0,0,1,1,1,1,0,0,0,0
4,NOR,5,B,river,cold-wet,cold,wet,0,0,1,...,0,0,1,1,0,0,1,0,0,0


In [75]:
# List of features
features = ['Rounded to Sub-Rounded',
       'Sub-Rounded to Sub-Angular', 'Sub-Angular to Angular', 'Low Relief',
       'Medium Relief', 'High Relief', 'Precipitation Features',
       'Dissolution Etching', 'Fracture Faces', 'Subparallel Linear Features',
       'Conchoidal Fractures', 'Curved Grooves', 'Straight Grooves',
       'Deep Troughs', 'Crescentic Gouges', 'Arc Shaped Steps', 'Linear Steps',
       'Sharp Angular Features', 'Upturned Plates', 'V Shaped Cracks',
       'Edge Rounding', 'Breakage Blocks', 'Abrasion Features'
]

In [76]:
df.location.unique()

array(['NOR', 'RP-16', 'RG', 'IGNF', 'AUS-Drift', 'LPM'], dtype=object)

In [77]:
df_nor = df[df['location'] == "NOR"]
df_nor

Unnamed: 0,location,set,a_b,location_type,classification,binary_temp,binary_moisture,Rounded to Sub-Rounded,Sub-Rounded to Sub-Angular,Sub-Angular to Angular,...,Deep Troughs,Crescentic Gouges,Arc Shaped Steps,Linear Steps,Sharp Angular Features,Upturned Plates,V Shaped Cracks,Edge Rounding,Breakage Blocks,Abrasion Features
0,NOR,5,B,river,cold-wet,cold,wet,0,1,0,...,0,0,1,0,0,0,1,1,0,0
1,NOR,5,B,river,cold-wet,cold,wet,0,1,0,...,0,1,0,1,0,0,0,1,0,0
2,NOR,5,B,river,cold-wet,cold,wet,0,0,1,...,0,0,1,0,0,0,0,0,0,0
3,NOR,5,B,river,cold-wet,cold,wet,0,0,1,...,0,0,1,1,1,1,0,0,0,0
4,NOR,5,B,river,cold-wet,cold,wet,0,0,1,...,0,0,1,1,0,0,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1186,NOR,5,A,river,cold-wet,cold,wet,0,0,1,...,0,0,1,1,0,0,0,0,0,0
1187,NOR,5,A,river,cold-wet,cold,wet,0,1,0,...,0,1,0,1,0,1,1,0,0,0
1188,NOR,5,A,river,cold-wet,cold,wet,0,0,1,...,0,0,1,1,0,0,0,0,0,0
1189,NOR,5,A,river,cold-wet,cold,wet,0,0,1,...,0,0,0,1,0,1,0,0,0,0


In [78]:
df_nor.describe()

Unnamed: 0,set,Rounded to Sub-Rounded,Sub-Rounded to Sub-Angular,Sub-Angular to Angular,Low Relief,Medium Relief,High Relief,Precipitation Features,Dissolution Etching,Fracture Faces,...,Deep Troughs,Crescentic Gouges,Arc Shaped Steps,Linear Steps,Sharp Angular Features,Upturned Plates,V Shaped Cracks,Edge Rounding,Breakage Blocks,Abrasion Features
count,346.0,346.0,346.0,346.0,346.0,346.0,346.0,346.0,346.0,346.0,...,346.0,346.0,346.0,346.0,346.0,346.0,346.0,346.0,346.0,346.0
mean,4.011561,0.00578,0.508671,0.485549,0.023121,0.540462,0.436416,0.367052,0.815029,0.115607,...,0.017341,0.15896,0.540462,0.65896,0.121387,0.257225,0.33815,0.34104,0.075145,0.112717
std,2.008643,0.075918,0.500649,0.500515,0.150507,0.499082,0.496659,0.482699,0.388836,0.320216,...,0.130728,0.366168,0.499082,0.474745,0.32705,0.437738,0.473765,0.474745,0.264006,0.316704
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,4.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,6.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0,0.0,...,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,0.0
max,7.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [79]:
columns = list(df_nor.columns)

In [80]:
for c in columns:
    x = df_nor[c].sum()
    if c in features:
        print(f'{c}: {x}')

Rounded to Sub-Rounded: 2
Sub-Rounded to Sub-Angular: 176
Sub-Angular to Angular: 168
Low Relief: 8
Medium Relief: 187
High Relief: 151
Precipitation Features: 127
Dissolution Etching: 282
Fracture Faces: 40
Subparallel Linear Features: 236
Conchoidal Fractures: 264
Curved Grooves: 16
Straight Grooves: 7
Deep Troughs: 6
Crescentic Gouges: 55
Arc Shaped Steps: 187
Linear Steps: 228
Sharp Angular Features: 42
Upturned Plates: 89
V Shaped Cracks: 117
Edge Rounding: 118
Breakage Blocks: 26
Abrasion Features: 39


In [81]:
nor_dict = {}
nor_dict["total"] = len(df_nor)
for c in columns:
    x = df_nor[c].sum()
    if c in features:
        nor_dict[c] = x

In [82]:
nor_dict

{'total': 346,
 'Rounded to Sub-Rounded': 2,
 'Sub-Rounded to Sub-Angular': 176,
 'Sub-Angular to Angular': 168,
 'Low Relief': 8,
 'Medium Relief': 187,
 'High Relief': 151,
 'Precipitation Features': 127,
 'Dissolution Etching': 282,
 'Fracture Faces': 40,
 'Subparallel Linear Features': 236,
 'Conchoidal Fractures': 264,
 'Curved Grooves': 16,
 'Straight Grooves': 7,
 'Deep Troughs': 6,
 'Crescentic Gouges': 55,
 'Arc Shaped Steps': 187,
 'Linear Steps': 228,
 'Sharp Angular Features': 42,
 'Upturned Plates': 89,
 'V Shaped Cracks': 117,
 'Edge Rounding': 118,
 'Breakage Blocks': 26,
 'Abrasion Features': 39}

In [95]:
nor_bar = pd.DataFrame.from_dict(nor_dict, orient='index')
nor_bar.reset_index(inplace=True)
nor_bar.rename(columns={"index":"Feature", 0:"Count"}, inplace=True)

In [96]:
nor_bar["Percent"] = nor_bar.apply(lambda row: round(row["Count"]/346, 3), axis=1)

In [97]:
nor_bar

Unnamed: 0,Feature,Count,Percent
0,total,346,1.0
1,Rounded to Sub-Rounded,2,0.006
2,Sub-Rounded to Sub-Angular,176,0.509
3,Sub-Angular to Angular,168,0.486
4,Low Relief,8,0.023
5,Medium Relief,187,0.54
6,High Relief,151,0.436
7,Precipitation Features,127,0.367
8,Dissolution Etching,282,0.815
9,Fracture Faces,40,0.116


In [98]:
fig = px.bar(nor_bar, y='Feature', x="Count", text='Count',
            title="Count of Features: Norway")
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig.show()

In [99]:
fig = px.bar(nor_bar, y='Feature', x="Percent", text="Percent",
            title="Percent Occurrene of Features: Norway")
fig.show()