In [180]:
# Import dependencies
import pandas as pd
import numpy as np

In [234]:
# Import data
df = pd.read_csv("raw_data/raw_data_combined.csv")
df.head()

Unnamed: 0,Rounded to Sub-Rounded,Sub Rounded to Sub Angular,Sub Angular to Angular,Low Relief,Medium Relief,High Relief,Precipitation Features,Dissolution Etching,Fracture Faces,Subparallel Linear Features,...,Linear Steps,Sharp Angular Features,Upturned Plates,V Shaped,Edge Rounding,Breakage Blocks,Abrasion Features,location,set,a_b
0,0,1,0,0,1,0,0,1,0,1,...,0,0,0,1,1,0,0,ELVA,5,B
1,0,1,0,0,1,0,1,0,1,1,...,1,0,0,0,1,0,0,ELVA,5,B
2,0,0,1,0,1,0,0,1,0,1,...,0,0,0,0,0,0,0,ELVA,5,B
3,0,0,1,0,1,0,0,1,1,1,...,1,1,1,0,0,0,0,ELVA,5,B
4,0,0,1,0,1,0,0,1,0,1,...,1,0,0,1,0,0,0,ELVA,5,B


In [235]:
# Get column names
column_names = df.columns

In [236]:
column_names

Index(['Rounded to Sub-Rounded', 'Sub Rounded to Sub Angular',
       'Sub Angular to Angular', 'Low Relief', 'Medium Relief', 'High Relief',
       'Precipitation Features', 'Dissolution Etching', 'Fracture Faces',
       'Subparallel Linear Features', 'Conchoidal Fractures', 'Curved Grooves',
       'Straight Grooves', 'Deep Troughs', 'Crescentic Gouges',
       'Arc Shaped Steps', 'Linear Steps', 'Sharp Angular Features',
       'Upturned Plates', 'V Shaped', 'Edge Rounding', 'Breakage Blocks',
       'Abrasion Features', 'location', 'set', 'a_b'],
      dtype='object')

In [237]:
# Reorder columns
new_order = ['location', 'set', 'a_b', 'Rounded to Sub-Rounded', 'Sub Rounded to Sub Angular',
       'Sub Angular to Angular', 'Low Relief', 'Medium Relief', 'High Relief',
       'Precipitation Features', 'Dissolution Etching', 'Fracture Faces',
       'Subparallel Linear Features', 'Conchoidal Fractures', 'Curved Grooves',
       'Straight Grooves', 'Deep Troughs', 'Crescentic Gouges',
       'Arc Shaped Steps', 'Linear Steps', 'Sharp Angular Features',
       'Upturned Plates', 'V Shaped', 'Edge Rounding', 'Breakage Blocks',
       'Abrasion Features']

In [238]:
df = df.reindex(columns=new_order)

In [239]:
df.head()

Unnamed: 0,location,set,a_b,Rounded to Sub-Rounded,Sub Rounded to Sub Angular,Sub Angular to Angular,Low Relief,Medium Relief,High Relief,Precipitation Features,...,Deep Troughs,Crescentic Gouges,Arc Shaped Steps,Linear Steps,Sharp Angular Features,Upturned Plates,V Shaped,Edge Rounding,Breakage Blocks,Abrasion Features
0,ELVA,5,B,0,1,0,0,1,0,0,...,0,0,1,0,0,0,1,1,0,0
1,ELVA,5,B,0,1,0,0,1,0,1,...,0,1,0,1,0,0,0,1,0,0
2,ELVA,5,B,0,0,1,0,1,0,0,...,0,0,1,0,0,0,0,0,0,0
3,ELVA,5,B,0,0,1,0,1,0,0,...,0,0,1,1,1,1,0,0,0,0
4,ELVA,5,B,0,0,1,0,1,0,0,...,0,0,1,1,0,0,1,0,0,0


In [240]:
df.location.unique()

array(['ELVA', 'RP-16', 'AUS', 'RG', 'IGNF', 'AUS-Drift', 'LPM'],
      dtype=object)

In [241]:
# Dictionary for classifications of each input - this will be the the ultimate model 
# output for higher level models
classification = {
    "ELVA": "cold-wet",
    "RP-16": "cold-dry",
    "AUS": "cold-wet",
    "RG": "hot-wet",
    "IGNF": "hot-dry",
    "AUS-Drift": "cold-wet-glacial",
    "LPM": "cold-dry-glacial"
}

# Location type
location_type = {
    "ELVA": "river",
    "RP-16": "river",
    "AUS": "river",
    "RG": "river",
    "IGNF": "river",
    "AUS-Drift": "moraine",
    "LPM": "moraine"
}

# Model output for first logistic regression model
classification_binary_temp = {
    "ELVA": "cold",
    "RP-16": "cold",
    "AUS": "cold",
    "RG": "hot",
    "IGNF": "hot",
    "AUS-Drift": "cold",
    "LPM": "cold"
}

# Model output for second logistic regression model
classification_binary_moisture = {
    "ELVA": "wet",
    "RP-16": "dry",
    "AUS": "wet",
    "RG": "wet",
    "IGNF": "dry",
    "AUS-Drift": "wet",
    "LPM": "dry"
}

In [242]:
df["location_type"] = df.location.apply(lambda x: location_type[x])
df["classification"] = df.location.apply(lambda x: classification[x])
df["binary_temp"] = df.location.apply(lambda x: classification_binary_temp[x])
df["binary_moisture"] = df.location.apply(lambda x: classification_binary_moisture[x])

In [243]:
df.head()

Unnamed: 0,location,set,a_b,Rounded to Sub-Rounded,Sub Rounded to Sub Angular,Sub Angular to Angular,Low Relief,Medium Relief,High Relief,Precipitation Features,...,Sharp Angular Features,Upturned Plates,V Shaped,Edge Rounding,Breakage Blocks,Abrasion Features,location_type,classification,binary_temp,binary_moisture
0,ELVA,5,B,0,1,0,0,1,0,0,...,0,0,1,1,0,0,river,cold-wet,cold,wet
1,ELVA,5,B,0,1,0,0,1,0,1,...,0,0,0,1,0,0,river,cold-wet,cold,wet
2,ELVA,5,B,0,0,1,0,1,0,0,...,0,0,0,0,0,0,river,cold-wet,cold,wet
3,ELVA,5,B,0,0,1,0,1,0,0,...,1,1,0,0,0,0,river,cold-wet,cold,wet
4,ELVA,5,B,0,0,1,0,1,0,0,...,0,0,1,0,0,0,river,cold-wet,cold,wet


In [244]:
# Get column names
column_names = df.columns
column_names

Index(['location', 'set', 'a_b', 'Rounded to Sub-Rounded',
       'Sub Rounded to Sub Angular', 'Sub Angular to Angular', 'Low Relief',
       'Medium Relief', 'High Relief', 'Precipitation Features',
       'Dissolution Etching', 'Fracture Faces', 'Subparallel Linear Features',
       'Conchoidal Fractures', 'Curved Grooves', 'Straight Grooves',
       'Deep Troughs', 'Crescentic Gouges', 'Arc Shaped Steps', 'Linear Steps',
       'Sharp Angular Features', 'Upturned Plates', 'V Shaped',
       'Edge Rounding', 'Breakage Blocks', 'Abrasion Features',
       'location_type', 'classification', 'binary_temp', 'binary_moisture'],
      dtype='object')

In [245]:
# Reorder columns
new_order = ['location', 'set', 'a_b', 'location_type', 'classification', 
        'binary_temp', 'binary_moisture', 
        'Rounded to Sub-Rounded', 'Sub Rounded to Sub Angular',
        'Sub Angular to Angular', 'Low Relief', 'Medium Relief', 'High Relief',
        'Precipitation Features', 'Dissolution Etching', 'Fracture Faces',
        'Subparallel Linear Features', 'Conchoidal Fractures', 'Curved Grooves',
        'Straight Grooves', 'Deep Troughs', 'Crescentic Gouges',
        'Arc Shaped Steps', 'Linear Steps', 'Sharp Angular Features',
        'Upturned Plates', 'V Shaped', 'Edge Rounding', 'Breakage Blocks',
        'Abrasion Features']

In [246]:
df = df.reindex(columns=new_order)

In [247]:
# Rename columns for consistency
# Get column names
column_names = df.columns
column_names

Index(['location', 'set', 'a_b', 'location_type', 'classification',
       'binary_temp', 'binary_moisture', 'Rounded to Sub-Rounded',
       'Sub Rounded to Sub Angular', 'Sub Angular to Angular', 'Low Relief',
       'Medium Relief', 'High Relief', 'Precipitation Features',
       'Dissolution Etching', 'Fracture Faces', 'Subparallel Linear Features',
       'Conchoidal Fractures', 'Curved Grooves', 'Straight Grooves',
       'Deep Troughs', 'Crescentic Gouges', 'Arc Shaped Steps', 'Linear Steps',
       'Sharp Angular Features', 'Upturned Plates', 'V Shaped',
       'Edge Rounding', 'Breakage Blocks', 'Abrasion Features'],
      dtype='object')

In [248]:
df.rename(columns={
    'Sub Rounded to Sub Angular': 'Sub-Rounded to Sub-Angular',
    'Sub Angular to Angular': 'Sub-Angular to Angular',
    'V Shaped': 'V Shaped Cracks'
}, inplace=True)

In [249]:
df.head()

Unnamed: 0,location,set,a_b,location_type,classification,binary_temp,binary_moisture,Rounded to Sub-Rounded,Sub-Rounded to Sub-Angular,Sub-Angular to Angular,...,Deep Troughs,Crescentic Gouges,Arc Shaped Steps,Linear Steps,Sharp Angular Features,Upturned Plates,V Shaped Cracks,Edge Rounding,Breakage Blocks,Abrasion Features
0,ELVA,5,B,river,cold-wet,cold,wet,0,1,0,...,0,0,1,0,0,0,1,1,0,0
1,ELVA,5,B,river,cold-wet,cold,wet,0,1,0,...,0,1,0,1,0,0,0,1,0,0
2,ELVA,5,B,river,cold-wet,cold,wet,0,0,1,...,0,0,1,0,0,0,0,0,0,0
3,ELVA,5,B,river,cold-wet,cold,wet,0,0,1,...,0,0,1,1,1,1,0,0,0,0
4,ELVA,5,B,river,cold-wet,cold,wet,0,0,1,...,0,0,1,1,0,0,1,0,0,0


In [250]:
# Combine Norway river locations; need to reorder the transect sampling locations
norway_aus = {
    1: 1,
    3: 2,
    7: 3
}

norway_elva = {
    1: 4, 
    5: 5, 
    7: 6,
    11: 7
}

In [251]:
df["set"] = df.apply(lambda row: norway_elva[row['set']] if row['location'] == "ELVA" else row['set'], axis=1)
df["set"] = df.apply(lambda row: norway_aus[row['set']] if row['location'] == "AUS" else row['set'], axis=1)

In [252]:
# Rename ELVA and AUS to NOR
df["location"] = df.apply(lambda row: "NOR" if row["location"] in ["AUS", "ELVA"] else row['location'], axis=1)

In [253]:
# Check to make sure ELVA and AUS are not in df
df[(df['location'] == "ELVA") | (df['location'] == "AUS")]

Unnamed: 0,location,set,a_b,location_type,classification,binary_temp,binary_moisture,Rounded to Sub-Rounded,Sub-Rounded to Sub-Angular,Sub-Angular to Angular,...,Deep Troughs,Crescentic Gouges,Arc Shaped Steps,Linear Steps,Sharp Angular Features,Upturned Plates,V Shaped Cracks,Edge Rounding,Breakage Blocks,Abrasion Features


In [254]:
# Check location set to NOR
df[(df['location'] == "NOR")].set.unique()

array([5, 3, 7, 6, 1, 4, 2])

In [255]:
df.head()

Unnamed: 0,location,set,a_b,location_type,classification,binary_temp,binary_moisture,Rounded to Sub-Rounded,Sub-Rounded to Sub-Angular,Sub-Angular to Angular,...,Deep Troughs,Crescentic Gouges,Arc Shaped Steps,Linear Steps,Sharp Angular Features,Upturned Plates,V Shaped Cracks,Edge Rounding,Breakage Blocks,Abrasion Features
0,NOR,5,B,river,cold-wet,cold,wet,0,1,0,...,0,0,1,0,0,0,1,1,0,0
1,NOR,5,B,river,cold-wet,cold,wet,0,1,0,...,0,1,0,1,0,0,0,1,0,0
2,NOR,5,B,river,cold-wet,cold,wet,0,0,1,...,0,0,1,0,0,0,0,0,0,0
3,NOR,5,B,river,cold-wet,cold,wet,0,0,1,...,0,0,1,1,1,1,0,0,0,0
4,NOR,5,B,river,cold-wet,cold,wet,0,0,1,...,0,0,1,1,0,0,1,0,0,0


In [256]:
df.location.unique()

array(['NOR', 'RP-16', 'RG', 'IGNF', 'AUS-Drift', 'LPM'], dtype=object)

In [257]:
df.to_csv("outputs/prepared_data.csv", index=False)