In [1]:
# Import dependencies
import pandas as pd

In [22]:
# Import data
df = pd.read_csv("raw_data/raw_data_combined.csv")
df.head()

Unnamed: 0,Rounded to Sub-Rounded,Sub Rounded to Sub Angular,Sub Angular to Angular,Low Relief,Medium Relief,High Relief,Precipitation Features,Dissolution Etching,Fracture Faces,Subparallel Linear Features,...,Linear Steps,Sharp Angular Features,Upturned Plates,V Shaped,Edge Rounding,Breakage Blocks,Abrasion Features,location,set,a_b
0,0,1,0,0,1,0,0,1,0,1,...,0,0,0,1,1,0,0,ELVA,5,B
1,0,1,0,0,1,0,1,0,1,1,...,1,0,0,0,1,0,0,ELVA,5,B
2,0,0,1,0,1,0,0,1,0,1,...,0,0,0,0,0,0,0,ELVA,5,B
3,0,0,1,0,1,0,0,1,1,1,...,1,1,1,0,0,0,0,ELVA,5,B
4,0,0,1,0,1,0,0,1,0,1,...,1,0,0,1,0,0,0,ELVA,5,B


In [23]:
# Get column names
column_names = df.columns

In [24]:
column_names

Index(['Rounded to Sub-Rounded', 'Sub Rounded to Sub Angular',
       'Sub Angular to Angular', 'Low Relief', 'Medium Relief', 'High Relief',
       'Precipitation Features', 'Dissolution Etching', 'Fracture Faces',
       'Subparallel Linear Features', 'Conchoidal Fractures', 'Curved Grooves',
       'Straight Grooves', 'Deep Troughs', 'Crescentic Gouges',
       'Arc Shaped Steps', 'Linear Steps', 'Sharp Angular Features',
       'Upturned Plates', 'V Shaped', 'Edge Rounding', 'Breakage Blocks',
       'Abrasion Features', 'location', 'set', 'a_b'],
      dtype='object')

In [25]:
# Reorder columns
new_order = ['location', 'set', 'a_b', 'Rounded to Sub-Rounded', 'Sub Rounded to Sub Angular',
       'Sub Angular to Angular', 'Low Relief', 'Medium Relief', 'High Relief',
       'Precipitation Features', 'Dissolution Etching', 'Fracture Faces',
       'Subparallel Linear Features', 'Conchoidal Fractures', 'Curved Grooves',
       'Straight Grooves', 'Deep Troughs', 'Crescentic Gouges',
       'Arc Shaped Steps', 'Linear Steps', 'Sharp Angular Features',
       'Upturned Plates', 'V Shaped', 'Edge Rounding', 'Breakage Blocks',
       'Abrasion Features']

In [26]:
df = df.reindex(columns=new_order)

In [27]:
df.head()

Unnamed: 0,location,set,a_b,Rounded to Sub-Rounded,Sub Rounded to Sub Angular,Sub Angular to Angular,Low Relief,Medium Relief,High Relief,Precipitation Features,...,Deep Troughs,Crescentic Gouges,Arc Shaped Steps,Linear Steps,Sharp Angular Features,Upturned Plates,V Shaped,Edge Rounding,Breakage Blocks,Abrasion Features
0,ELVA,5,B,0,1,0,0,1,0,0,...,0,0,1,0,0,0,1,1,0,0
1,ELVA,5,B,0,1,0,0,1,0,1,...,0,1,0,1,0,0,0,1,0,0
2,ELVA,5,B,0,0,1,0,1,0,0,...,0,0,1,0,0,0,0,0,0,0
3,ELVA,5,B,0,0,1,0,1,0,0,...,0,0,1,1,1,1,0,0,0,0
4,ELVA,5,B,0,0,1,0,1,0,0,...,0,0,1,1,0,0,1,0,0,0


In [28]:
df.location.unique()

array(['ELVA', 'RP-16', 'AUS', 'RG', 'IGNF', 'AUS-Drift', 'LPM'],
      dtype=object)

In [29]:
# Dictionary for classifications of each input - this will be the the ultimate model 
# output for higher level models
classification = {
    "ELVA": "cold-wet",
    "RP-16": "cold-dry",
    "AUS": "cold-wet",
    "RG": "hot-wet",
    "IGNF": "hot-dry",
    "AUS-Drift": "cold-wet-glacial",
    "LPM": "cold-dry-glacial"
}

# Location type
location_type = {
    "ELVA": "river",
    "RP-16": "river",
    "AUS": "river",
    "RG": "river",
    "IGNF": "river",
    "AUS-Drift": "moraine",
    "LPM": "moraine"
}

# Model output for first logistic regression model
classification_binary_temp = {
    "ELVA": "cold",
    "RP-16": "cold",
    "AUS": "cold",
    "RG": "hot",
    "IGNF": "hot",
    "AUS-Drift": "cold",
    "LPM": "cold"
}

# Model output for second logistic regression model
classification_binary_moisture = {
    "ELVA": "wet",
    "RP-16": "dry",
    "AUS": "wet",
    "RG": "wet",
    "IGNF": "dry",
    "AUS-Drift": "wet",
    "LPM": "dry"
}

In [30]:
df["location_type"] = df.location.apply(lambda x: location_type[x])
df["classification"] = df.location.apply(lambda x: classification[x])
df["binary_temp"] = df.location.apply(lambda x: classification_binary_temp[x])
df["binary_moisture"] = df.location.apply(lambda x: classification_binary_moisture[x])

In [31]:
df.head()

Unnamed: 0,location,set,a_b,Rounded to Sub-Rounded,Sub Rounded to Sub Angular,Sub Angular to Angular,Low Relief,Medium Relief,High Relief,Precipitation Features,...,Sharp Angular Features,Upturned Plates,V Shaped,Edge Rounding,Breakage Blocks,Abrasion Features,location_type,classification,binary_temp,binary_moisture
0,ELVA,5,B,0,1,0,0,1,0,0,...,0,0,1,1,0,0,river,cold-wet,cold,wet
1,ELVA,5,B,0,1,0,0,1,0,1,...,0,0,0,1,0,0,river,cold-wet,cold,wet
2,ELVA,5,B,0,0,1,0,1,0,0,...,0,0,0,0,0,0,river,cold-wet,cold,wet
3,ELVA,5,B,0,0,1,0,1,0,0,...,1,1,0,0,0,0,river,cold-wet,cold,wet
4,ELVA,5,B,0,0,1,0,1,0,0,...,0,0,1,0,0,0,river,cold-wet,cold,wet


In [32]:
# Get column names
column_names = df.columns
column_names

Index(['location', 'set', 'a_b', 'Rounded to Sub-Rounded',
       'Sub Rounded to Sub Angular', 'Sub Angular to Angular', 'Low Relief',
       'Medium Relief', 'High Relief', 'Precipitation Features',
       'Dissolution Etching', 'Fracture Faces', 'Subparallel Linear Features',
       'Conchoidal Fractures', 'Curved Grooves', 'Straight Grooves',
       'Deep Troughs', 'Crescentic Gouges', 'Arc Shaped Steps', 'Linear Steps',
       'Sharp Angular Features', 'Upturned Plates', 'V Shaped',
       'Edge Rounding', 'Breakage Blocks', 'Abrasion Features',
       'location_type', 'classification', 'binary_temp', 'binary_moisture'],
      dtype='object')

In [36]:
# Reorder columns
new_order = ['location', 'set', 'a_b', 'location_type', 'classification', 
        'binary_temp', 'binary_moisture', 
        'Rounded to Sub-Rounded', 'Sub Rounded to Sub Angular',
        'Sub Angular to Angular', 'Low Relief', 'Medium Relief', 'High Relief',
        'Precipitation Features', 'Dissolution Etching', 'Fracture Faces',
        'Subparallel Linear Features', 'Conchoidal Fractures', 'Curved Grooves',
        'Straight Grooves', 'Deep Troughs', 'Crescentic Gouges',
        'Arc Shaped Steps', 'Linear Steps', 'Sharp Angular Features',
        'Upturned Plates', 'V Shaped', 'Edge Rounding', 'Breakage Blocks',
        'Abrasion Features']

In [37]:
df = df.reindex(columns=new_order)

In [38]:
df.head()

Unnamed: 0,location,set,a_b,location_type,classification,binary_temp,binary_moisture,Rounded to Sub-Rounded,Sub Rounded to Sub Angular,Sub Angular to Angular,...,Deep Troughs,Crescentic Gouges,Arc Shaped Steps,Linear Steps,Sharp Angular Features,Upturned Plates,V Shaped,Edge Rounding,Breakage Blocks,Abrasion Features
0,ELVA,5,B,river,cold-wet,cold,wet,0,1,0,...,0,0,1,0,0,0,1,1,0,0
1,ELVA,5,B,river,cold-wet,cold,wet,0,1,0,...,0,1,0,1,0,0,0,1,0,0
2,ELVA,5,B,river,cold-wet,cold,wet,0,0,1,...,0,0,1,0,0,0,0,0,0,0
3,ELVA,5,B,river,cold-wet,cold,wet,0,0,1,...,0,0,1,1,1,1,0,0,0,0
4,ELVA,5,B,river,cold-wet,cold,wet,0,0,1,...,0,0,1,1,0,0,1,0,0,0


In [39]:
df.to_csv("outputs/prepared_data.csv", index=False)

In [40]:
df.location_type.unique()

array(['river', 'moraine'], dtype=object)

In [43]:
df.loc[df['location_type'] == "moraine"]

Unnamed: 0,location,set,a_b,location_type,classification,binary_temp,binary_moisture,Rounded to Sub-Rounded,Sub Rounded to Sub Angular,Sub Angular to Angular,...,Deep Troughs,Crescentic Gouges,Arc Shaped Steps,Linear Steps,Sharp Angular Features,Upturned Plates,V Shaped,Edge Rounding,Breakage Blocks,Abrasion Features
450,AUS-Drift,1,B,moraine,cold-wet-glacial,cold,wet,0,1,0,...,0,0,1,1,0,0,0,0,0,0
451,AUS-Drift,1,B,moraine,cold-wet-glacial,cold,wet,0,1,0,...,0,0,1,1,0,0,0,0,0,0
452,AUS-Drift,1,B,moraine,cold-wet-glacial,cold,wet,0,1,0,...,0,0,1,0,0,0,0,1,0,0
453,AUS-Drift,1,B,moraine,cold-wet-glacial,cold,wet,0,0,1,...,0,0,1,1,0,0,0,0,0,0
454,AUS-Drift,1,B,moraine,cold-wet-glacial,cold,wet,0,1,0,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
837,AUS-Drift,1,A,moraine,cold-wet-glacial,cold,wet,0,1,0,...,0,0,1,1,1,0,0,1,0,0
838,AUS-Drift,1,A,moraine,cold-wet-glacial,cold,wet,0,0,1,...,0,0,0,0,1,1,0,0,0,0
839,AUS-Drift,1,A,moraine,cold-wet-glacial,cold,wet,0,1,0,...,0,0,0,0,0,0,0,0,1,0
840,AUS-Drift,1,A,moraine,cold-wet-glacial,cold,wet,0,0,1,...,0,0,0,1,1,0,0,0,0,0
