In [None]:
import geopandas as gpd
import pandas as pd

In [None]:
FP_NE_ADMIN0 = "data/ne/ne_10m_admin_0_countries.zip"
FP_NE_ADMIN1 = "data/ne/ne_10m_admin_1_states_provinces.zip"
FP_NE_CITIES = "data/ne/ne_10m_populated_places_simple.zip"

## Read from Google Sheets

In [None]:
def load_sheet(
    refresh=False,
    sheet_id="12I5GR8v8H1LVXtjYX5S-v6zuO0up_aY3D3BqplzSu4s",
    sheet_num="0",
    save_path="data/olivia_cities.csv",
):
    """
    Load data from Google Sheet and save again,
    or just load from saved csv
    """
    
    if refresh:
        url = f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?gid={sheet_num}&format=csv"
        df = pd.read_csv(url)
        df.to_csv(save_path, index=False)
        
    # get data
    return pd.read_csv(save_path)

In [None]:
# df = load_sheet(refresh=True)

In [None]:
def prepare_data():
    df = load_sheet(refresh=False)
    
    # TODO: replace with groupby, retaining earliest date and any bike
    df = df.drop_duplicates("name")
    
    return df

### Cleanup

In [None]:
def keep_first(df, col):
    """Remove duplicate, keep first by date"""
    
    # copy
    df = pd.DataFrame(df)
    
    # sort by date
    df["date"] = pd.to_datetime(df["date"])
    df = df.sort_values("date")
    
    # return simplified
    return df[[col, "biked?", "date"]].drop_duplicates(col, keep='first')

## Get US States, non-US countries

In [None]:
map_args = {
    "tiles": "CartoDB positron"
}

In [None]:
df = prepare_data()
df.shape

In [None]:
visited_usa = df.loc[df["adm0name"] == "United States of America"]
visited_abroad = df.loc[df["adm0name"] != "United States of America"]

### Countries (admin0)

In [None]:
def merge_countries(countries, visited):
    countries_small = countries["ADMIN"].reset_index()
    merged = pd.merge(countries_small, visited, left_on="ADMIN", right_on="adm0name")
    return countries.iloc[merged['index']]

In [None]:
# get countries visited
v = keep_first(visited_abroad, "adm0name").dropna()

# get countries dataset
countries = gpd.read_file(FP_NE_ADMIN0)

In [None]:
f = merge_countries(countries, v)

In [None]:
m = f.explore(
    tooltip="ADMIN",
    **map_args
)

### US States (admin1)

In [None]:
def merge_regions(regions, visited):
    region_small = regions["name"].reset_index()
    merged = pd.merge(region_small, visited, left_on="name", right_on="adm1name", how="inner")
    return regions.iloc[merged['index']] 

In [None]:
v = keep_first(visited_usa, "adm1name")

In [None]:
regions = gpd.read_file(FP_NE_ADMIN1)

In [None]:
# export

# readable_list = ["adm1_code", "iso_a2", "name", "name_alt", "region", "type", "type_en", "postal", "adm0_a3", "iso_3166_2"]
# regions[readable_list].to_csv("admin1.csv")

In [None]:
merged_regions = merge_regions(regions, v)
merged_regions.shape

In [None]:
merged_regions.explore(
    tiles="CartoDB positron"
)

## Other

### Find missing stuff

In [None]:
merged = gdf.merge(df, on=["name", "adm0name"], how='right')

In [None]:
# merged[merged['index_x'].isna()]

In [None]:
merged = gdf.merge(df, on=["name", "adm0name"])

In [None]:
merged = gdf.merge(df, on=["name", "adm1name", "adm0name"])

## Cities

In [None]:
# pd.set_option("display.max.columns", None)

In [None]:
# get olivia data
df = load_sheet(refresh=False)

# prepare it
df = df.reset_index()
df["date"] = pd.to_datetime(df["date"])

In [None]:


# get cities data
gdf_full = gpd.read_file(FP_NE_CITIES)

# prepare it
# gdf = gdf_full.reset_index().loc[:, ["index", "name", "adm1name", "adm0name"]]
gdf = gdf_full

In [None]:
# gdf_full[gdf_full["NAME"] == "Helsinki"]