## **Preprocess** uConnect

**Required Imports**

In [None]:
import sys

import geopandas as gpd
import numpy as np
import pandas as pd
import shapely

sys.path.append('../../')
import src

**Degree Sequences**

In [3]:
mencommreg_numpars1 = pd.read_excel("../../data/uconnect/raw/UConnect MSM Tabulate Results.xlsx", sheet_name="numpars1", header=None)
mencommreg_totalmen = pd.read_excel("../../data/uconnect/raw/UConnect MSM Tabulate Results.xlsx", sheet_name="totalmen", header=None)
num_bmsm_numpars1 = pd.read_excel("../../data/uconnect/raw/UConnect Tabulate Results(1241).xlsx", sheet_name="numpars1", header=None)
num_bmsm_totalmen = pd.read_excel("../../data/uconnect/raw/UConnect Tabulate Results(1241).xlsx", sheet_name="totalmen", header=None)

In [4]:
dataframes = {
    "mencommreg_numpars1": mencommreg_numpars1,
    "mencommreg_totalmen": mencommreg_totalmen,
    "num_bmsm_numpars1": num_bmsm_numpars1,
    "num_bmsm_totalmen": num_bmsm_totalmen
}

degree_sequences_data = []
for networks, dataframe in dataframes.items():
    social, _, sexual = networks.rpartition("_")
    degree_sequences = src.data.reconstruct_degree_sequences(dataframe.iloc[1:-1, 1:-1])
    degree_sequences_data.append(pd.DataFrame({
        "social_network": [social, social],
        "sexual_network": [sexual, sexual],
        "network": [social, sexual],
        "degree_sequence": [degree_sequences[0], degree_sequences[1]]
    }))

degree_sequences_data = pd.concat(degree_sequences_data, ignore_index=True)

In [5]:
degree_sequences_data.to_parquet("../../data/uconnect/processed/degree_sequences_uconnect.parquet")

**Population Density**

In [6]:
boundaries_data = pd.read_csv("../../data/uconnect/raw/CensusBlockTIGER2010_20250514.csv")
population_data = pd.read_csv("../../data/uconnect/raw/Population_by_2010_Census_Block_20250514.csv")

In [7]:
boundaries_data = boundaries_data.rename(columns={"the_geom": "geometry", "GEOID10": "census_block_id"})
population_data = population_data.rename(columns={"TOTAL POPULATION": "population", "CENSUS BLOCK FULL": "census_block_id"})

boundaries_data = boundaries_data[["geometry", "census_block_id"]]
population_data = population_data[["population", "census_block_id"]]

population_density_data = boundaries_data.merge(population_data, on="census_block_id")
population_density_data = population_density_data.drop_duplicates(subset=["census_block_id"])
population_density_data = population_density_data[["census_block_id", "population", "geometry"]]
population_density_data["geometry"] = population_density_data["geometry"].apply(shapely.wkt.loads)
population_density_data = gpd.GeoDataFrame(population_density_data, geometry="geometry")
population_density_data = population_density_data.set_crs(epsg=4326)

In [8]:
population_density_data.to_file("../../data/uconnect/processed/population_density_uconnect.geojson", driver="GeoJSON")