In [None]:
import os

import geopandas as gpd
import pandas as pd

In [None]:
color_sheet = pd.read_excel('../CGMW_ICS_colour_codes.xlsx',skiprows=9)
header = pd.read_excel('../CGMW_ICS_colour_codes.xlsx',header=5).columns
color_sheet.columns = header

color_dict = dict(zip(color_sheet['Long List (formatted)'],color_sheet['HTML RGB HASH']))
color_dict[None] = '#FFFFFF'
print(color_dict)

In [None]:
# Read files
A_layers = ['Polygons-K-38-37-A','Polygonz4-K-38-25-V','Polygonz4-K3813','Polygonz4K-38-35-G','Polygonz4K-38-38-G']
B_layer = 'Polygonz4-K-38-V-G'

maps = []
for name in A_layers:
    layer = gpd.read_file('./digitized_maps/SovietDigitizing_041124_A.gpkg',layer=name)

    # Fix upper Jurassic error in 38-38-A (misnamed K-38-37-A in original file)
    if name=='Polygons-K-38-37-A':
        layer.loc[9:10,'unit'] = 'J3km+t'

    maps.append(layer)

    

B = gpd.read_file('./digitized_maps/SovietDigitizing_041124_B.gpkg',layer=B_layer)

# Fix errors in B
print(B.loc[19,'unit'])
B.loc[19,'unit'] = 'J13s1'
print(B.loc[83,'unit'])
B.loc[83,'unit'] = 'J2hd4'

maps.append(B)

map_names = ['K-38-38-A','K-38-25-V','K3813','K-38-35-G','K-38-38-G','K-38-38-V']


In [None]:
all_units = pd.Series()
for map in maps:
    map.rename(columns={'units':'unit'},inplace=True)
    
    map.unit.replace({'J2S2':'J2s2','C21-Tch':'C12-Tch','C2-Tch':'C12-Tch','J11mr1':'Tmr1','J11mr2':'Tmr2','J21mr2':'J12mr2','J21ms1':'Tms1','J21ms11':'J12ms11',
                      'J21ms2':'J12ms2','J21ms21':'J12ms12','J31':'J13','J31s':'J13s1','J31s1':'J13s1',
                      'Pg32':'Pg23','Pg1+Pg1+22':'Pg1+Pg21+2','Pg2(1)2':'Pg22(1)','Pg2(2+3)2':'Pg22(2+3)','N21':'N12',
                      'gamma1C21-C2':'gamma1C12-C2','gamma2C21-C2':'gamma2C12-C2','pC21-C2':'pC12-C2'},inplace=True)


    map_units = pd.Series(map.unit.unique())
    all_units = pd.concat([all_units,map_units])

unique_units = all_units.unique()
unique_units.sort()

print(unique_units)
print(len(unique_units))

In [None]:
df = pd.DataFrame(index=unique_units)

div1_dic = {'C1':'Carboniferous','C2':'Carboniferous','C3':'Carboniferous','T':'Triassic','Cr':'Cretaceous','E':'Eocene','Ice':None,'J':'Jurassic','K':'Cretaceous',
        'N':'Neogene','O':'Ordovician','S':'Silurian','Pg':'Paleogene','Q':'Quaternary','D':'Devonian'}

div1_order = ['Ordovician','Silurian','Devonian','Carboniferous','Triassic','Jurassic','Cretaceous','Paleogene','Eocene','Neogene','Quaternary']

for unit in unique_units:
    div1 = [v for k,v in div1_dic.items() if k in unit]

    if len(div1)==1:
        earliest_time = div1[0]
    
    elif len(div1)>1:
        positions = []
        for time in div1:
            positions.append(div1_order.index(time))
        earliest_index = positions.index(min(positions))
        earliest_time = div1[earliest_index]

        
    df.loc[unit,'div1'] = earliest_time
    df.loc[unit,'div1_color'] = color_dict[earliest_time]


In [None]:
div2_dic = {'C1':'Mississippian','C3':'Pennsylvanian','Cr1':'Lower Cretaceous','Cr2':'Upper Cretaceous','D2':'Middle Devonian',
            'J1':'Lower Jurassic','J2':'Middle Jurassic','J3':'Upper Jurassic','K1':'Lower Cretaceous','N1':'Miocene','N2':'Pliocene',
            'E':'Eocene','O':'Ordovician','S1':'Llandovery','S2':'Wenlock','Pg1':'Paleocene','Pg2':'Eocene','Pg3':'Oligocene',
            'Q':'Quaternary','T':'Triassic'}

div2_order = ['Ordovician','Llandovery','Wenlock','Middle Devonian','Mississippian','Pennsylvanian','Triassic',
              'Lower Jurassic','Middle Jurassic','Upper Jurassic','Lower Cretaceous', 'Upper Cretaceous',
              'Paleocene','Eocene','Oligocene','Miocene','Pliocene','Quaternary']

for unit in unique_units:
    div2 = [v for k,v in div2_dic.items() if k in unit]

    if len(div2)==1:
        earliest_time = div2[0]
    
    elif len(div2)>1:
        positions = []
        for time in div2:
            positions.append(div2_order.index(time))
        earliest_index = positions.index(min(positions))
        earliest_time = div2[earliest_index]

    elif len(div2)==0:
        earliest_time=None

    df.loc[unit,'div2'] = earliest_time
    df.loc[unit,'div2_color'] = color_dict[earliest_time]

In [None]:
intrus_dict = {'beta':'red','gamma':'red','mu':'red','delta':'red','phi':'red'}

for unit in unique_units:
    for k in intrus_dict:
        if k in unit:
            df.loc[unit,['div1_color','div2_color']]=intrus_dict[k]

In [None]:
df.to_csv('unit_info.csv')

In [None]:
shape_dir = 'shapefiles/'
os.makedirs(shape_dir,exist_ok=True)
unit_dict = df.to_dict()

for k,map in enumerate(maps):
    map.rename(columns={'units':'unit'},inplace=True)
    map['div1'] = map['unit'].map(unit_dict['div1'])
    map['div1_color'] = map['unit'].map(unit_dict['div1_color'])
    map['div2'] = map['unit'].map(unit_dict['div2'])
    map['div2_color'] = map['unit'].map(unit_dict['div2_color'])

    filename = map_names[k] + '.shp'
    
    map.to_file(shape_dir+filename)