# Combine district CSVs into a timeseries file

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import glob
import os
import json

### Get all the district json files

In [8]:
path = ""
files = glob.glob(os.path.join(path, "data/districts-2021*.csv"))

In [9]:
charter_files = glob.glob(os.path.join(path, "data/charters-2021*.csv"))

### Read them and create a date field

In [10]:
file_df = (
    pd.read_csv(f, low_memory=False).assign(date=os.path.basename(f)) for f in files
)

In [11]:
charter_df = (
    pd.read_csv(f, low_memory=False).assign(date=os.path.basename(f))
    for f in charter_files
)

### Concatenate all the files

In [12]:
concatenated_df = pd.concat(
    file_df,
    ignore_index=True,
)

In [13]:
concatenated_charters_df = pd.concat(
    charter_df,
    ignore_index=True,
)

### Clean up the date field and column names

In [14]:
concatenated_df["date"] = (
    concatenated_df["date"]
    .str.replace("districts-", "", regex=False)
    .str.replace(".csv", "", regex=False)
)

In [15]:
concatenated_df.columns = (
    concatenated_df.columns.str.replace(".", "", regex=False)
    .str.lower()
    .str.replace("attributes", "", regex=False)
)

In [16]:
concatenated_charters_df["date"] = (
    concatenated_charters_df["date"]
    .str.replace("charters-", "", regex=False)
    .str.replace(".csv", "", regex=False)
)

In [17]:
concatenated_charters_df.columns = (
    concatenated_charters_df.columns.str.replace(".", "", regex=False)
    .str.lower()
    .str.replace("attributes", "", regex=False)
)

### Cleaning up the unix dates

In [37]:
district_dates = [
    "creationdate",
    "editdate",
    "nearterm_reopening_date",
    "elem_reopen_dt",
    "sec_reopen_dt",
]

In [38]:
charter_dates = [
    "creationdate",
    "editdate",
    "reopening_date",
]

In [39]:
def parse_dates(df, dates):
    for d in dates:
        df[f"{d}"] = pd.to_datetime(df[f"{d}"], unit="ms")
        df[f"{d}_clean"] = df[f"{d}"].dt.date
    return df

In [40]:
df = parse_dates(concatenated_df, district_dates)

In [41]:
charters_final = parse_dates(concatenated_charters_df, charter_dates)

---

## Export

In [43]:
df.to_csv("data/districts-all-timeseries.csv", index=False)

In [44]:
charters_final.to_csv("data/charters-all-timeseries.csv", index=False)