In [22]:
import pandas as pd
import numpy as np

In [23]:
networks_metadata_path = "../../data/networks/all/networks_metadata.csv"
parsed_rmangal_metadata_path = "../../data/networks/parsed_rmangal_networks_metadata.csv"

bioclimate_varnames = {"BioClim_1": "Annual Mean Temperature",
"BioClim_2": "Mean Diurnal Range (Mean of monthly (max temp - min temp))",
"BioClim_3": "Isothermality (BioClim_2/BioClim_7) (×100)",
"BioClim_4": "Temperature Seasonality (standard deviation ×100)",
"BioClim_5": "Max Temperature of Warmest Month",
"BioClim_6": "Min Temperature of Coldest Month",
"BioClim_7": "Temperature Annual Range (BioClim_5-BioClim_6)",
"BioClim_8": "Mean Temperature of Wettest Quarter",
"BioClim_9": "Mean Temperature of Driest Quarter",
"BioClim_10": "Mean Temperature of Warmest Quarter",
"BioClim_11": "Mean Temperature of Coldest Quarter",
"BioClim_12": "Annual Precipitation",
"BioClim_13": "Precipitation of Wettest Month",
"BioClim_14": "Precipitation of Driest Month",
"BioClim_15": "Precipitation Seasonality (Coefficient of Variation)",
"BioClim_16": "Precipitation of Wettest Quarter",
"BioClim_17": "Precipitation of Driest Quarter",
"BioClim_18": "Precipitation of Warmest Quarter",
"BioClim_19": "Precipitation of Coldest Quarter"}

## parse data

In [38]:
networks_metadata = pd.read_csv(networks_metadata_path)

In [39]:
networks_metadata = networks_metadata.rename(columns=bioclimate_varnames).drop(["EcoRegion.y"], axis=1).rename(columns={"EcoRegion.x":"EcoRegion"})

In [40]:
networks_metadata.to_csv(networks_metadata_path)

In [51]:
drop = ['X',
'Mean.Diurnal.Range..Mean.of.monthly..max.temp...min.temp..',
'Mean.Temperature.of.Driest.Quarter',
'Mean.Temperature.of.Warmest.Quarter',
'Isothermality..BioClim_2.BioClim_7....100.', 
'species_richness.x',
'Precipitation.of.Warmest.Quarter', 
'Precipitation.of.Wettest.Month',
'Obs_ID',
'Precipitation.of.Driest.Month',
'Mean.Temperature.of.Coldest.Quarter',
'Temperature.Seasonality..standard.deviation..100.',
'Precipitation.of.Coldest.Quarter', 
'Min.Temperature.of.Coldest.Month',
'Mean.Temperature.of.Wettest.Quarter', 
'Annual.Precipitation',
'Annual.Mean.Temperature', 
'Max.Temperature.of.Warmest.Month',
'Precipitation.Seasonality..Coefficient.of.Variation.',
'Precipitation.of.Wettest.Quarter', 
'Precipitation.of.Driest.Quarter',
'Temperature.Annual.Range..BioClim_5.BioClim_6.', 'Unnamed: 0', 'ID', 'X.1', 'species_richness']

networks_metadata = networks_metadata[[c for c in networks_metadata.columns if c not in drop]]

In [60]:
# Get unique column names
unique_columns = networks_metadata.columns.unique()

# Create a new dataframe to store the unique columns
networks_metadata_unique = pd.DataFrame()

# Combine columns with the same name
for col in unique_columns:
    cols_to_combine = [c for c in networks_metadata.columns if c == col]
    combined_col = networks_metadata[cols_to_combine].fillna(axis=1, method='ffill').iloc[:, -1]
    networks_metadata_unique[col] = combined_col

In [69]:
networks_metadata_unique["network_type"] = networks_metadata_unique.processed_path.apply(lambda p: p.split("/")[-2])

In [72]:
networks_metadata_unique.to_csv(networks_metadata_path)

## unite parsed data to all networks metadata

In [31]:
networks_metadata = pd.read_csv(networks_metadata_path)
mangal_metadata = pd.read_csv(parsed_rmangal_metadata_path)

In [32]:
mangal_networks_metadata = networks_metadata.query("source == 'mangal'").merge(mangal_metadata, on="network_index", how="left")

In [33]:
mangal_networks_metadata = mangal_networks_metadata[[c for c in mangal_networks_metadata.columns if "Unnamed" not in c]]

In [34]:
for c in set(mangal_networks_metadata.columns)-set(networks_metadata.columns):
    networks_metadata[c] = np.nan

networks_metadata.set_index(["network_type", "network_index"], inplace=True)
mangal_networks_metadata.set_index(["network_type", "network_index"], inplace=True)
networks_metadata.update(mangal_networks_metadata)
networks_metadata = networks_metadata[[c for c in networks_metadata.columns if "Unnamed" not in c]]
networks_metadata.reset_index(inplace=True)

In [35]:
networks_metadata.to_csv(networks_metadata_path)