# Produce Geo x File Matrix

In [None]:
#import functions
import os
import sys

sys.path.append(
    os.path.join(os.path.abspath(os.path.join(os.path.curdir, os.path.pardir)))
)

#censusdis
from collections import OrderedDict

import geopandas as gpd
import matplotlib.pyplot as plt

from typing import Optional

import censusdis.data as ced
import censusdis.maps as cem
import censusdis.values as cev
import censusdis.geography as cgeo
from censusdis import states
from censusdis.maps import ShapeReader, plot_us_boundary
import censusdis.maps as cmap


# Make sure it is there.
from censusdis.values import ALL_SPECIAL_VALUES

# _______________________________________________________________________

#standard packages
import pandas as pd
import numpy as np
import math
import glob
from math import pi, sqrt

#plotting
import plotly.express as px
import matplotlib.pyplot as plt
# import pygwalker as pyg

# import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import datetime
import time
from tqdm import tqdm, trange

#gis packages
import osmnx as ox
import logging as lg
ox.settings.log_console=True #use cache to avoid overloading the server
# ox.settings.memory_cache=True #use cache to avoid overloading the server
ox.settings.memory = 4294967296 #set memory cache to 4GB

from shapely.geometry import Point
import folium
import networkx as nx


## future libaries
# import contextily as cx
# import fiona
# from pandana.loaders import osm
# import momepy
# import missingno as msno
# from us import states
# import imageio

import warnings
warnings.filterwarnings("ignore")

%matplotlib inline
ox.__version__

pd.set_option('display.max_colwidth', 50)
pd.set_option('display.width', 1000)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 10)

# Get Data from Summary Files

In [None]:
# IMPORT MATRIX FILE

matrix = pd.read_csv(f'C:/Users/jerem/OneDrive/Documents/Git Projects/MeridianXYZ/data/census/acs/summary files/2022/Meridian_ACS522_Geos20225YR_Matrix.csv')

In [None]:
matrix.head(10)

In [None]:
# SET VARIABLES
DATASET = "acs/acs5"
YEAR = 2021
# YEAR_GEO = 2021 # no geo in 2022
TOTAL_POPULATION = "B01001_001E"
VARIABLES = ["NAME", TOTAL_POPULATION]

reader = cem.ShapeReader(year=YEAR) #required for censusdis

# GET BOUNDARIES FOR ALL STATES
gdf_state_bounds = reader.read_cb_shapefile("us", "state")
gdf_state_bounds = gdf_state_bounds[
    gdf_state_bounds["STATEFP"].isin(states.ALL_STATES_AND_DC)
]

# Ensure the GeoDataFrame has the correct CRS
gdf_state_bounds = gdf_state_bounds.to_crs(epsg=4269)

# ESTABLISH GEOGRAPHIC REGION OF ANALYSIS

# START WITH CBSAs
gdf_cbsa = ced.download(
    DATASET,
    YEAR,
    VARIABLES,
    metropolitan_statistical_area_micropolitan_statistical_area="*",
    with_geometry=True,
)

print(gdf_cbsa.shape)
gdf_cbsa

In [None]:
gdf_cbsa_map = gdf_cbsa.set_index('NAME')
gdf_cbsa_map

In [None]:
fig = px.choropleth_mapbox(gdf_cbsa_map,
                           geojson=gdf_cbsa_map.geometry,
                           locations=gdf_cbsa_map.index,
                           color="B01001_001E",
                           center={"lat": 38.191743794717325, "lon": -97.56042861287186},
                           mapbox_style="open-street-map",
                           zoom=2,
                           width=1000, height=800)

fig.update_layout(
    margin=dict(l=20, r=20, t=40, b=20, pad=10),
    paper_bgcolor="Gray",
)
fig.show()

In [None]:
df = pd.read_parquet(f'C:/Users/jerem/OneDrive/Documents/Git Projects/MeridianXYZ/data/census/acs/summary files/2022/Meridian_variables_list_df.parquet')
df

In [None]:
# (unsure what this code is for)

# DOWNLOAD DATA BASED ON GEOLEVELS, STORE INTO DIFFERENT DATA FRAMES

matrix_cbsa = matrix[['CBSA']].dropna()

print(len(matrix))
print(len(matrix_cbsa))
matrix_cbsa

# GATHER DATA FROM OSM BASED ON THE GEOGRAPHIC AREA OF INTEREST


# CREATE FEATURE SELECTION BASED ON DIFFERENT TYPES OF MEASUREMENT CRITERIA


# CREATE NATIONAL AND REGIONAL STATISTICAL COMPARISONS


# CREATE PERFORMANCE CRITERIA MEASUREMENTS


# CREATE TYPOLOGIES BASED ON PERFORMANCE CRITERIA MEASUREMENTS

# Tutorial From here down

In [None]:
# download shell file and keep only General Model rows

acs22_5_shell_genmodel = pd.read_excel('C:/Users/jerem/OneDrive/Documents/Git Projects/MeridianXYZ/data/census/acs/summary files/2022/ACS20225YR_Table_Shells.xlsx', sheet_name='ACS20225YR_Table_Shells')

In [None]:

fig = px.bar(x=["a", "b", "c"], y=[1, 3, 2])
fig.write_html('first_figure.html', auto_open=True)

In [None]:
fig = px.bar(x=["a", "b", "c"], y=[1, 3, 2])
fig.show()

In [None]:
df = px.data.iris()
fig = px.scatter(df, x="sepal_width", y="sepal_length", color="species")
fig.show()

In [None]:

df = px.data.iris()
fig = px.scatter(df, x="sepal_width", y="sepal_length", color="species", marginal_y="box",
           marginal_x="violin", trendline="ols", template="simple_white")
fig.show()

In [None]:
df = px.data.election()
geojson = px.data.election_geojson()

fig = px.choropleth_mapbox(df, geojson=geojson, color="Bergeron",
                           locations="district", featureidkey="properties.district",
                           center={"lat": 45.5517, "lon": -73.7073},
                           mapbox_style="carto-positron", zoom=9,
                           width=800, height=800)
fig.update_layout(
    margin=dict(l=20, r=20, t=40, b=20, pad=10),
    paper_bgcolor="Gray",
)
fig.show()