Run this notebook to download data from SafeGraph.

In [1]:
import pandas as pd
import sys, os
from csv import writer
sys.path.append('utils/')
import tqdm
#from census_tract_locator import *

In [2]:
import numpy as np
import json
import base64
import requests
from getpass import getpass
import geopandas as gpd
from shapely.geometry import Polygon

In [3]:
from helper_funcs import *
dirname = 'data/nyc_metro/'
createDir(dirname)
tract_file_path = '../Tracts/nyc_metro_boundaries/nyc_metro_boundaries.shp'

# this is used to filter the mobility rows that we want
boundary_data_for_filtering = gpd.read_file(tract_file_path)
boundary_data_for_filtering = boundary_data_for_filtering[['GEOID', 'state_abbr', 'geometry']]
boundary_data_for_filtering.GEOID = boundary_data_for_filtering.GEOID.astype(str)
boundary_data_for_filtering.head(2)

Unnamed: 0,GEOID,state_abbr,geometry
0,9003514500,CT,"POLYGON ((-72.56218 41.78077, -72.56122 41.780..."
1,9003514600,CT,"POLYGON ((-72.55995 41.77014, -72.55994 41.770..."


In [4]:
def get_tract_num(tract_data, point):
    if not tract_data['geometry'].contains(point).any():
        return np.nan
    tract_info = tract_data.loc[tract_data['geometry'].contains(point), ['GEOID']]
    tract_info = tract_info.iat[0, 0]
    print('found point inside')

    return tract_info

def get_geoids_and_filter(df, tract_data=boundary_data_for_filtering):
    df['GEOID'] = np.nan
    df.loc[df['poi_cbg'].notna(), 'poi_cbg'].apply(lambda x: print(f'CBG length incorrect: {x}') if len(x) != 12 else None)
    df.loc[df['poi_cbg'].notna(), 'GEOID'] = df.loc[df['poi_cbg'].notna(), 'poi_cbg'].apply(lambda x: x[0:-1])

    # get the tract that the POIs are located within
    cp = df.loc[df.GEOID.isna()]
    point = gpd.points_from_xy(x=cp['longitude'], y=cp['latitude'])
    points = gpd.GeoSeries(point, index=cp.index)

    cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
    df.loc[df.GEOID.isna(), 'GEOID'] = cp['GEOID']
    
    df.drop(index=df[df.GEOID.isna()].index, inplace=True)
    return df

In [5]:
boundary_data_for_filtering.state_abbr.unique()

array(['CT', 'PA', 'NY', 'NJ'], dtype=object)

In [6]:
def get_access_token(username, pword):
  
  credentials = f"{username}:{pword}" # Format credentials according to the API's expectations
  # print(f"Credentials: {credentials}")

  # Convert credentials to base64
  credentials_bytes = credentials.encode('ascii') 
  base64_credentials_bytes = base64.b64encode(credentials_bytes)
  base64_credentials = base64_credentials_bytes.decode('ascii')
  # print(f"Base64 credentials: {base64_credentials}")

  # Request access token
  headers = {
      'accept': 'application/json',
      'Authorization': f'Basic {base64_credentials}'
  }
  response = requests.post("https://marketplace.deweydata.io/api/auth/tks/get_token", headers=headers)
  response_access_token = response.json()['access_token']
  # print(f"Access token: {response_access_token}")

  return response_access_token


# Recursive function that returns the paths to each file available
def get_file_paths(token, path=""):
  headers = {
      'accept': 'application/json',
      'Authorization': f'Bearer {token}'
  }

  response = requests.get(f"https://marketplace.deweydata.io/api/data/v2/list/{path}", headers=headers)
  response_df = pd.DataFrame(response.json())
  if response_df.shape[0] > 0:
    files = response_df[response_df['directory']==False]
    folders = response_df[response_df['directory']==True]


    for name in folders.name.unique():
      new_files, new_folders = get_file_paths(token, path=f"{path}{name}/")
      files = pd.concat([files, new_files], sort=True)
      folders = pd.concat([folders, new_folders], sort=True)

    return files, folders
  return pd.DataFrame(), pd.DataFrame()


def read_file(data_url, cols, token, compression='gzip'):
  base_url = "https://marketplace.deweydata.io"
  full_url = f"{base_url}{data_url}"

  headers = {
      'accept': 'application/json',
      'Authorization': f'Bearer {token}'
  }

  df = pd.read_csv(full_url, storage_options=headers, compression='gzip', usecols=cols, dtype={'latitude':  np.float64, 'longitude':  np.float64, 'poi_cbg': str})

  return df

def process_dataframe(df):
    # drop if not in the correct state
    df.drop(index=df[~df.region.isin(boundary_data_for_filtering.state_abbr.unique())].index, inplace=True)

    # Drop row if there is a parent placekey (I checked and all parent placekeys do match up with a row placekey). Visits are attributed to parent AND children
    df.drop(index=df[df.parent_placekey.notna()].index, inplace=True)
    
    # drop if no visits
    df.drop(index=df[df['visitor_home_aggregation'].isna()].index, inplace=True)
    
    # get data in the tracts that i want
    df = get_geoids_and_filter(df)
    
    df = df.loc[df['GEOID'].isin(boundary_data_for_filtering.GEOID.unique())]
    return df

def request_files_and_save_to_drive(files_df, cols, token, month_counter, dir_path):
  for i, file_name in tqdm.tqdm(enumerate(files_df.url.unique())):
    tmp_df = read_file(file_name, cols, token)
    print(f'\nRead file {i}.')

    tmp_df = process_dataframe(tmp_df,)

    ah = os.path.exists(f'{dir_path}{month_counter}.csv')

    tmp_df.to_csv(f"{dir_path}{month_counter}.csv", mode='a', index=False, header=not os.path.exists(f'{dir_path}{month_counter}.csv'))

    
    print(f'Saved {tmp_df.shape[0]} rows.')
  
  print(f"--------done: month {month_counter}--------")
  return True

In [7]:
un = getpass('Enter your Dewey Marketplace username (email): ')
pw = getpass('Enter your Dewey Marketplace password: ')

Enter your Dewey Marketplace username (email): ········
Enter your Dewey Marketplace password: ········


In [8]:
access_token = get_access_token(un, pw)
print("done")
print(f"Access token: {access_token}")

rsp = get_file_paths(access_token)[0]

done
Access token: EV0eNuWPa9sxX453aExyxfl3Hhs


In [9]:
# for i, file_name in tqdm.tqdm(enumerate(spend_sample.url.unique())):
#     print(file_name)

In [10]:
cols=['placekey','parent_placekey', 'date_range_start', 'date_range_end', 'longitude', 'latitude', 'city', 'region', 'iso_country_code', 'visitor_home_aggregation' , 'poi_cbg']


for i, month_cd in enumerate(['07', '08', '09', '10', '11', '12']):
    spend_sample = rsp[rsp["parent"].str.endswith("SAFEGRAPH/WP")]
    spend_sample = spend_sample[spend_sample["name"].str.contains('csv.gz')]

    s = '2021/' + month_cd
    spend_sample = spend_sample[spend_sample["parent"].str.contains(s)]

    request_files_and_save_to_drive(spend_sample, cols, access_token, month_counter=month_cd, dir_path=dirname)

0it [00:00, ?it/s]


Read file 0.


  s = pd.Series(data, index=index, name=name, **kwargs)
1it [00:17, 17.07s/it]

Saved 11359 rows.

Read file 1.


  s = pd.Series(data, index=index, name=name, **kwargs)
2it [00:33, 16.84s/it]

Saved 11297 rows.

Read file 2.


  s = pd.Series(data, index=index, name=name, **kwargs)
3it [00:48, 15.78s/it]

Saved 11443 rows.

Read file 3.


  s = pd.Series(data, index=index, name=name, **kwargs)
4it [01:25, 24.18s/it]

Saved 11290 rows.

Read file 4.


  s = pd.Series(data, index=index, name=name, **kwargs)
5it [01:41, 21.36s/it]

Saved 11472 rows.

Read file 5.


  s = pd.Series(data, index=index, name=name, **kwargs)
6it [01:58, 19.95s/it]

Saved 11444 rows.

Read file 6.


  s = pd.Series(data, index=index, name=name, **kwargs)
7it [02:16, 19.09s/it]

Saved 11736 rows.

Read file 7.


  s = pd.Series(data, index=index, name=name, **kwargs)
8it [02:34, 18.73s/it]

Saved 11595 rows.

Read file 8.


  s = pd.Series(data, index=index, name=name, **kwargs)
9it [03:03, 21.92s/it]

CBG length incorrect: CA:59150078
Saved 15726 rows.

Read file 9.


  s = pd.Series(data, index=index, name=name, **kwargs)
10it [03:23, 21.60s/it]

Saved 11599 rows.

Read file 10.


  s = pd.Series(data, index=index, name=name, **kwargs)
11it [03:44, 21.28s/it]

Saved 11546 rows.

Read file 11.


  s = pd.Series(data, index=index, name=name, **kwargs)
12it [04:21, 26.18s/it]

Saved 11126 rows.

Read file 12.


  s = pd.Series(data, index=index, name=name, **kwargs)
13it [04:41, 24.22s/it]

Saved 11620 rows.

Read file 13.


  s = pd.Series(data, index=index, name=name, **kwargs)
14it [05:01, 22.76s/it]

Saved 11324 rows.

Read file 14.


  s = pd.Series(data, index=index, name=name, **kwargs)
15it [05:20, 21.85s/it]

Saved 11362 rows.

Read file 15.


  s = pd.Series(data, index=index, name=name, **kwargs)
16it [05:41, 21.37s/it]

Saved 11551 rows.

Read file 16.


  s = pd.Series(data, index=index, name=name, **kwargs)
17it [06:04, 22.05s/it]

Saved 11656 rows.

Read file 17.


  s = pd.Series(data, index=index, name=name, **kwargs)
18it [06:22, 20.90s/it]

Saved 11449 rows.

Read file 18.


  s = pd.Series(data, index=index, name=name, **kwargs)
19it [06:40, 19.88s/it]

Saved 11452 rows.

Read file 19.


  s = pd.Series(data, index=index, name=name, **kwargs)
20it [06:58, 19.39s/it]

Saved 11270 rows.

Read file 20.


  s = pd.Series(data, index=index, name=name, **kwargs)
21it [07:23, 21.01s/it]

Saved 11310 rows.

Read file 21.


  s = pd.Series(data, index=index, name=name, **kwargs)
22it [07:43, 20.69s/it]

Saved 11425 rows.

Read file 22.


  s = pd.Series(data, index=index, name=name, **kwargs)
23it [08:02, 20.23s/it]

Saved 11486 rows.

Read file 23.


  s = pd.Series(data, index=index, name=name, **kwargs)
24it [08:23, 20.35s/it]

Saved 11372 rows.

Read file 24.


  s = pd.Series(data, index=index, name=name, **kwargs)
25it [08:43, 20.42s/it]

Saved 11253 rows.

Read file 25.


  s = pd.Series(data, index=index, name=name, **kwargs)
26it [09:04, 20.64s/it]

Saved 11247 rows.

Read file 26.


  s = pd.Series(data, index=index, name=name, **kwargs)
27it [09:24, 20.21s/it]

Saved 11371 rows.

Read file 27.


  s = pd.Series(data, index=index, name=name, **kwargs)
28it [09:42, 19.63s/it]

Saved 11180 rows.

Read file 28.


  s = pd.Series(data, index=index, name=name, **kwargs)
29it [10:00, 19.25s/it]

Saved 11469 rows.

Read file 29.


  s = pd.Series(data, index=index, name=name, **kwargs)
30it [10:18, 18.89s/it]

Saved 11353 rows.

Read file 30.


  s = pd.Series(data, index=index, name=name, **kwargs)
31it [10:36, 18.48s/it]

Saved 11734 rows.

Read file 31.


  s = pd.Series(data, index=index, name=name, **kwargs)
32it [11:00, 20.20s/it]

Saved 11493 rows.

Read file 32.
CBG length incorrect: CA:59150078


  s = pd.Series(data, index=index, name=name, **kwargs)
33it [11:27, 22.13s/it]

Saved 15640 rows.

Read file 33.


  s = pd.Series(data, index=index, name=name, **kwargs)
34it [11:58, 25.00s/it]

Saved 11469 rows.

Read file 34.


  s = pd.Series(data, index=index, name=name, **kwargs)
35it [12:16, 22.95s/it]

Saved 11415 rows.

Read file 35.


  s = pd.Series(data, index=index, name=name, **kwargs)
36it [12:47, 25.25s/it]

Saved 11061 rows.

Read file 36.


  s = pd.Series(data, index=index, name=name, **kwargs)
37it [13:11, 24.70s/it]

Saved 11501 rows.

Read file 37.


  s = pd.Series(data, index=index, name=name, **kwargs)
38it [13:40, 26.20s/it]

Saved 11178 rows.

Read file 38.


  s = pd.Series(data, index=index, name=name, **kwargs)
39it [14:10, 27.32s/it]

Saved 11288 rows.

Read file 39.


  s = pd.Series(data, index=index, name=name, **kwargs)
40it [14:27, 24.11s/it]

Saved 11486 rows.

Read file 40.


  s = pd.Series(data, index=index, name=name, **kwargs)
41it [14:44, 22.14s/it]

Saved 11557 rows.

Read file 41.


  s = pd.Series(data, index=index, name=name, **kwargs)
42it [15:01, 20.60s/it]

Saved 11357 rows.

Read file 42.


  s = pd.Series(data, index=index, name=name, **kwargs)
43it [15:17, 19.18s/it]

Saved 11396 rows.

Read file 43.


  s = pd.Series(data, index=index, name=name, **kwargs)
44it [15:33, 18.14s/it]

Saved 11232 rows.

Read file 44.


  s = pd.Series(data, index=index, name=name, **kwargs)
45it [15:51, 18.19s/it]

Saved 11250 rows.

Read file 45.


  s = pd.Series(data, index=index, name=name, **kwargs)
46it [16:09, 17.96s/it]

Saved 11294 rows.

Read file 46.


  s = pd.Series(data, index=index, name=name, **kwargs)
47it [16:25, 17.43s/it]

Saved 11370 rows.

Read file 47.


  s = pd.Series(data, index=index, name=name, **kwargs)
48it [16:43, 17.68s/it]

Saved 11346 rows.

Read file 48.


  s = pd.Series(data, index=index, name=name, **kwargs)
49it [17:01, 17.65s/it]

Saved 11285 rows.

Read file 49.


  s = pd.Series(data, index=index, name=name, **kwargs)
50it [17:22, 18.61s/it]

Saved 11282 rows.

Read file 50.


  s = pd.Series(data, index=index, name=name, **kwargs)
51it [17:38, 18.01s/it]

Saved 11149 rows.

Read file 51.


  s = pd.Series(data, index=index, name=name, **kwargs)
52it [17:56, 18.01s/it]

Saved 11305 rows.

Read file 52.


  s = pd.Series(data, index=index, name=name, **kwargs)
53it [18:16, 18.54s/it]

Saved 11415 rows.

Read file 53.


  s = pd.Series(data, index=index, name=name, **kwargs)
54it [18:37, 19.15s/it]

Saved 11390 rows.

Read file 54.


  s = pd.Series(data, index=index, name=name, **kwargs)
55it [18:56, 19.35s/it]

Saved 11700 rows.

Read file 55.


  s = pd.Series(data, index=index, name=name, **kwargs)
56it [19:15, 19.01s/it]

Saved 11309 rows.

Read file 56.
CBG length incorrect: CA:59150078


  s = pd.Series(data, index=index, name=name, **kwargs)
57it [20:11, 30.17s/it]

Saved 16304 rows.

Read file 57.


  s = pd.Series(data, index=index, name=name, **kwargs)
58it [20:31, 27.07s/it]

Saved 11501 rows.

Read file 58.


  s = pd.Series(data, index=index, name=name, **kwargs)
59it [20:48, 24.15s/it]

Saved 11431 rows.

Read file 59.


  s = pd.Series(data, index=index, name=name, **kwargs)
60it [21:06, 22.30s/it]

Saved 11089 rows.

Read file 60.


  s = pd.Series(data, index=index, name=name, **kwargs)
61it [21:24, 21.12s/it]

Saved 11508 rows.

Read file 61.


  s = pd.Series(data, index=index, name=name, **kwargs)
62it [21:43, 20.40s/it]

Saved 11211 rows.

Read file 62.


  s = pd.Series(data, index=index, name=name, **kwargs)
63it [22:02, 20.10s/it]

Saved 11280 rows.

Read file 63.


  s = pd.Series(data, index=index, name=name, **kwargs)
64it [22:21, 19.71s/it]

Saved 11322 rows.

Read file 64.


  s = pd.Series(data, index=index, name=name, **kwargs)
65it [22:43, 20.36s/it]

Saved 11547 rows.

Read file 65.


  s = pd.Series(data, index=index, name=name, **kwargs)
66it [23:01, 19.71s/it]

Saved 11346 rows.

Read file 66.


  s = pd.Series(data, index=index, name=name, **kwargs)
67it [23:22, 19.97s/it]

Saved 11336 rows.

Read file 67.


  s = pd.Series(data, index=index, name=name, **kwargs)
68it [23:39, 19.21s/it]

Saved 11255 rows.

Read file 68.


  s = pd.Series(data, index=index, name=name, **kwargs)
69it [23:59, 19.22s/it]

Saved 11247 rows.

Read file 69.


  s = pd.Series(data, index=index, name=name, **kwargs)
70it [24:24, 21.10s/it]

Saved 11114 rows.

Read file 70.


  s = pd.Series(data, index=index, name=name, **kwargs)
71it [24:45, 21.14s/it]

Saved 11407 rows.

Read file 71.


  s = pd.Series(data, index=index, name=name, **kwargs)
72it [25:04, 20.41s/it]

Saved 11326 rows.

Read file 72.


  s = pd.Series(data, index=index, name=name, **kwargs)
73it [25:24, 20.28s/it]

Saved 11345 rows.

Read file 73.


  s = pd.Series(data, index=index, name=name, **kwargs)
74it [25:42, 19.68s/it]

Saved 11209 rows.

Read file 74.


  s = pd.Series(data, index=index, name=name, **kwargs)
75it [26:12, 22.85s/it]

Saved 11306 rows.

Read file 75.


  s = pd.Series(data, index=index, name=name, **kwargs)
76it [26:31, 21.46s/it]

Saved 11349 rows.

Read file 76.


  s = pd.Series(data, index=index, name=name, **kwargs)
77it [26:50, 20.84s/it]

Saved 11380 rows.

Read file 77.


  s = pd.Series(data, index=index, name=name, **kwargs)
78it [27:13, 21.54s/it]

Saved 11310 rows.

Read file 78.


  s = pd.Series(data, index=index, name=name, **kwargs)
79it [27:35, 21.75s/it]

Saved 11624 rows.

Read file 79.


  s = pd.Series(data, index=index, name=name, **kwargs)
80it [28:00, 22.68s/it]

Saved 11424 rows.

Read file 80.


  s = pd.Series(data, index=index, name=name, **kwargs)
81it [28:27, 24.00s/it]

CBG length incorrect: CA:59150078
Saved 14331 rows.

Read file 81.


  s = pd.Series(data, index=index, name=name, **kwargs)
82it [28:48, 22.92s/it]

Saved 11363 rows.

Read file 82.


  s = pd.Series(data, index=index, name=name, **kwargs)
83it [29:16, 24.56s/it]

Saved 11400 rows.

Read file 83.


  s = pd.Series(data, index=index, name=name, **kwargs)
84it [29:40, 24.46s/it]

Saved 11117 rows.

Read file 84.


  s = pd.Series(data, index=index, name=name, **kwargs)
85it [29:59, 22.80s/it]

Saved 11402 rows.

Read file 85.


  s = pd.Series(data, index=index, name=name, **kwargs)
86it [30:54, 32.33s/it]

Saved 11181 rows.

Read file 86.


  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
87it [31:11, 27.68s/it]

Saved 11240 rows.

Read file 87.


  s = pd.Series(data, index=index, name=name, **kwargs)
88it [31:28, 24.53s/it]

Saved 11399 rows.

Read file 88.


  s = pd.Series(data, index=index, name=name, **kwargs)
89it [31:46, 22.61s/it]

Saved 11490 rows.

Read file 89.


  s = pd.Series(data, index=index, name=name, **kwargs)
90it [32:03, 21.02s/it]

Saved 11317 rows.

Read file 90.


  s = pd.Series(data, index=index, name=name, **kwargs)
91it [32:20, 19.75s/it]

Saved 11494 rows.

Read file 91.


  s = pd.Series(data, index=index, name=name, **kwargs)
92it [32:38, 19.07s/it]

Saved 11366 rows.

Read file 92.


  s = pd.Series(data, index=index, name=name, **kwargs)
93it [32:56, 18.98s/it]

Saved 11297 rows.

Read file 93.


  s = pd.Series(data, index=index, name=name, **kwargs)
94it [33:12, 18.02s/it]

Saved 11263 rows.

Read file 94.


  s = pd.Series(data, index=index, name=name, **kwargs)
95it [33:34, 19.06s/it]

Saved 11341 rows.

Read file 95.


  s = pd.Series(data, index=index, name=name, **kwargs)
96it [33:53, 21.18s/it]
0it [00:00, ?it/s]

Saved 11431 rows.
--------done: month 07--------

Read file 0.


  s = pd.Series(data, index=index, name=name, **kwargs)
1it [00:17, 17.20s/it]

Saved 11563 rows.

Read file 1.


  s = pd.Series(data, index=index, name=name, **kwargs)
2it [00:35, 17.86s/it]

Saved 11560 rows.

Read file 2.


  s = pd.Series(data, index=index, name=name, **kwargs)
3it [00:53, 17.71s/it]

Saved 11417 rows.

Read file 3.


  s = pd.Series(data, index=index, name=name, **kwargs)
4it [01:11, 17.90s/it]

Saved 11582 rows.

Read file 4.


  s = pd.Series(data, index=index, name=name, **kwargs)
5it [01:31, 18.60s/it]

Saved 11499 rows.

Read file 5.


  s = pd.Series(data, index=index, name=name, **kwargs)
6it [01:51, 19.05s/it]

Saved 11483 rows.

Read file 6.


  s = pd.Series(data, index=index, name=name, **kwargs)
7it [02:19, 22.03s/it]

Saved 11869 rows.

Read file 7.


  s = pd.Series(data, index=index, name=name, **kwargs)
8it [02:36, 20.43s/it]

Saved 11601 rows.

Read file 8.
CBG length incorrect: CA:59150078


  s = pd.Series(data, index=index, name=name, **kwargs)
9it [03:06, 23.61s/it]

Saved 18413 rows.

Read file 9.


  s = pd.Series(data, index=index, name=name, **kwargs)
10it [03:23, 21.59s/it]

Saved 11834 rows.

Read file 10.


  s = pd.Series(data, index=index, name=name, **kwargs)
11it [03:40, 20.19s/it]

Saved 11542 rows.

Read file 11.


  s = pd.Series(data, index=index, name=name, **kwargs)
12it [03:58, 19.32s/it]

Saved 11460 rows.

Read file 12.


  s = pd.Series(data, index=index, name=name, **kwargs)
13it [04:15, 18.72s/it]

Saved 11584 rows.

Read file 13.


  s = pd.Series(data, index=index, name=name, **kwargs)
14it [04:35, 19.13s/it]

Saved 11365 rows.

Read file 14.


  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
15it [05:05, 22.45s/it]

Saved 11589 rows.

Read file 15.


  s = pd.Series(data, index=index, name=name, **kwargs)
16it [05:23, 20.99s/it]

Saved 11658 rows.

Read file 16.


  s = pd.Series(data, index=index, name=name, **kwargs)
17it [05:41, 20.08s/it]

Saved 11737 rows.

Read file 17.


  s = pd.Series(data, index=index, name=name, **kwargs)
18it [05:58, 19.17s/it]

Saved 11789 rows.

Read file 18.


  s = pd.Series(data, index=index, name=name, **kwargs)
19it [06:16, 18.98s/it]

Saved 11659 rows.

Read file 19.


  s = pd.Series(data, index=index, name=name, **kwargs)
20it [06:34, 18.52s/it]

Saved 11622 rows.

Read file 20.


  s = pd.Series(data, index=index, name=name, **kwargs)
21it [06:53, 18.63s/it]

Saved 11513 rows.

Read file 21.


  s = pd.Series(data, index=index, name=name, **kwargs)
22it [07:11, 18.47s/it]

Saved 11546 rows.

Read file 22.


  s = pd.Series(data, index=index, name=name, **kwargs)
23it [07:42, 22.29s/it]

Saved 11477 rows.

Read file 23.


  s = pd.Series(data, index=index, name=name, **kwargs)
24it [08:10, 23.96s/it]

Saved 11556 rows.

Read file 24.


  s = pd.Series(data, index=index, name=name, **kwargs)
25it [08:30, 22.78s/it]

Saved 11356 rows.

Read file 25.


  s = pd.Series(data, index=index, name=name, **kwargs)
26it [08:49, 21.53s/it]

Saved 11425 rows.

Read file 26.


  s = pd.Series(data, index=index, name=name, **kwargs)
27it [09:13, 22.35s/it]

Saved 11470 rows.

Read file 27.


  s = pd.Series(data, index=index, name=name, **kwargs)
28it [09:39, 23.57s/it]

Saved 11265 rows.

Read file 28.


  s = pd.Series(data, index=index, name=name, **kwargs)
29it [09:57, 21.84s/it]

Saved 11528 rows.

Read file 29.


  s = pd.Series(data, index=index, name=name, **kwargs)
30it [10:16, 20.88s/it]

Saved 11385 rows.

Read file 30.


  s = pd.Series(data, index=index, name=name, **kwargs)
31it [10:34, 20.13s/it]

Saved 11778 rows.

Read file 31.


  s = pd.Series(data, index=index, name=name, **kwargs)
32it [10:55, 20.48s/it]

Saved 11549 rows.

Read file 32.


  s = pd.Series(data, index=index, name=name, **kwargs)
33it [11:21, 21.93s/it]

CBG length incorrect: CA:59150078
Saved 15540 rows.

Read file 33.


  s = pd.Series(data, index=index, name=name, **kwargs)
34it [11:38, 20.44s/it]

Saved 11557 rows.

Read file 34.


  s = pd.Series(data, index=index, name=name, **kwargs)
35it [11:56, 19.70s/it]

Saved 11572 rows.

Read file 35.


  s = pd.Series(data, index=index, name=name, **kwargs)
36it [12:15, 19.67s/it]

Saved 11292 rows.

Read file 36.


  s = pd.Series(data, index=index, name=name, **kwargs)
37it [12:38, 20.62s/it]

Saved 11594 rows.

Read file 37.


  s = pd.Series(data, index=index, name=name, **kwargs)
38it [12:56, 19.88s/it]

Saved 11300 rows.

Read file 38.


  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
39it [13:15, 19.46s/it]

Saved 11455 rows.

Read file 39.


  s = pd.Series(data, index=index, name=name, **kwargs)
40it [13:32, 18.84s/it]

Saved 11553 rows.

Read file 40.


  s = pd.Series(data, index=index, name=name, **kwargs)
41it [13:50, 18.72s/it]

Saved 11713 rows.

Read file 41.


  s = pd.Series(data, index=index, name=name, **kwargs)
42it [14:09, 18.67s/it]

Saved 11441 rows.

Read file 42.


  s = pd.Series(data, index=index, name=name, **kwargs)
43it [14:28, 18.70s/it]

Saved 11469 rows.

Read file 43.


  s = pd.Series(data, index=index, name=name, **kwargs)
44it [14:46, 18.61s/it]

Saved 11301 rows.

Read file 44.


  s = pd.Series(data, index=index, name=name, **kwargs)
45it [15:04, 18.43s/it]

Saved 11298 rows.

Read file 45.


  s = pd.Series(data, index=index, name=name, **kwargs)
46it [15:24, 18.77s/it]

Saved 11463 rows.

Read file 46.


  s = pd.Series(data, index=index, name=name, **kwargs)
47it [15:43, 18.93s/it]

Saved 11504 rows.

Read file 47.


  s = pd.Series(data, index=index, name=name, **kwargs)
48it [16:02, 19.01s/it]

Saved 11438 rows.

Read file 48.


  s = pd.Series(data, index=index, name=name, **kwargs)
49it [16:20, 18.52s/it]

Saved 11356 rows.

Read file 49.


  s = pd.Series(data, index=index, name=name, **kwargs)
50it [16:50, 22.09s/it]

Saved 11337 rows.

Read file 50.


  s = pd.Series(data, index=index, name=name, **kwargs)
51it [17:08, 20.93s/it]

Saved 11379 rows.

Read file 51.


  s = pd.Series(data, index=index, name=name, **kwargs)
52it [17:26, 20.02s/it]

Saved 11315 rows.

Read file 52.


  s = pd.Series(data, index=index, name=name, **kwargs)
53it [17:46, 20.07s/it]

Saved 11518 rows.

Read file 53.


  s = pd.Series(data, index=index, name=name, **kwargs)
54it [18:07, 20.22s/it]

Saved 11432 rows.

Read file 54.


  s = pd.Series(data, index=index, name=name, **kwargs)
55it [18:27, 20.11s/it]

Saved 11777 rows.

Read file 55.


  s = pd.Series(data, index=index, name=name, **kwargs)
56it [18:45, 19.63s/it]

Saved 11408 rows.

Read file 56.


  s = pd.Series(data, index=index, name=name, **kwargs)
57it [19:11, 21.57s/it]

CBG length incorrect: CA:59150078
Saved 16243 rows.

Read file 57.


  s = pd.Series(data, index=index, name=name, **kwargs)
58it [19:32, 21.40s/it]

Saved 11516 rows.

Read file 58.


  s = pd.Series(data, index=index, name=name, **kwargs)
59it [19:52, 20.94s/it]

Saved 11499 rows.

Read file 59.


  s = pd.Series(data, index=index, name=name, **kwargs)
60it [20:11, 20.42s/it]

Saved 11135 rows.

Read file 60.


  s = pd.Series(data, index=index, name=name, **kwargs)
61it [20:31, 20.18s/it]

Saved 11601 rows.

Read file 61.


  s = pd.Series(data, index=index, name=name, **kwargs)
62it [20:51, 20.16s/it]

Saved 11300 rows.

Read file 62.


  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
63it [21:10, 19.79s/it]

Saved 11287 rows.

Read file 63.


  s = pd.Series(data, index=index, name=name, **kwargs)
64it [21:28, 19.18s/it]

Saved 11356 rows.

Read file 64.


  s = pd.Series(data, index=index, name=name, **kwargs)
65it [21:46, 18.91s/it]

Saved 11681 rows.

Read file 65.


  s = pd.Series(data, index=index, name=name, **kwargs)
66it [22:07, 19.46s/it]

Saved 11442 rows.

Read file 66.


  s = pd.Series(data, index=index, name=name, **kwargs)
67it [22:28, 20.05s/it]

Saved 11476 rows.

Read file 67.


  s = pd.Series(data, index=index, name=name, **kwargs)
68it [22:50, 20.43s/it]

Saved 11262 rows.

Read file 68.


  s = pd.Series(data, index=index, name=name, **kwargs)
69it [23:20, 23.29s/it]

Saved 11311 rows.

Read file 69.


  s = pd.Series(data, index=index, name=name, **kwargs)
70it [23:37, 21.59s/it]

Saved 11248 rows.

Read file 70.


  s = pd.Series(data, index=index, name=name, **kwargs)
71it [23:58, 21.26s/it]

Saved 11472 rows.

Read file 71.


  s = pd.Series(data, index=index, name=name, **kwargs)
72it [24:16, 20.35s/it]

Saved 11361 rows.

Read file 72.


  s = pd.Series(data, index=index, name=name, **kwargs)
73it [24:36, 20.38s/it]

Saved 11370 rows.

Read file 73.


  s = pd.Series(data, index=index, name=name, **kwargs)
74it [24:53, 19.36s/it]

Saved 11243 rows.

Read file 74.


  s = pd.Series(data, index=index, name=name, **kwargs)
75it [25:12, 19.08s/it]

Saved 11256 rows.

Read file 75.


  s = pd.Series(data, index=index, name=name, **kwargs)
76it [25:30, 18.69s/it]

Saved 11334 rows.

Read file 76.


  s = pd.Series(data, index=index, name=name, **kwargs)
77it [26:00, 22.31s/it]

Saved 11539 rows.

Read file 77.


  s = pd.Series(data, index=index, name=name, **kwargs)
78it [26:18, 21.04s/it]

Saved 11444 rows.

Read file 78.


  s = pd.Series(data, index=index, name=name, **kwargs)
79it [26:36, 19.91s/it]

Saved 11765 rows.

Read file 79.


  s = pd.Series(data, index=index, name=name, **kwargs)
80it [26:54, 19.34s/it]

Saved 11420 rows.

Read file 80.
CBG length incorrect: CA:59150078


  s = pd.Series(data, index=index, name=name, **kwargs)
81it [27:21, 21.71s/it]

Saved 17167 rows.

Read file 81.


  s = pd.Series(data, index=index, name=name, **kwargs)
82it [27:47, 22.96s/it]

Saved 11661 rows.

Read file 82.


  s = pd.Series(data, index=index, name=name, **kwargs)
83it [28:05, 21.60s/it]

Saved 11425 rows.

Read file 83.


  s = pd.Series(data, index=index, name=name, **kwargs)
84it [28:25, 20.98s/it]

Saved 11177 rows.

Read file 84.


  s = pd.Series(data, index=index, name=name, **kwargs)
85it [28:44, 20.47s/it]

Saved 11372 rows.

Read file 85.


  s = pd.Series(data, index=index, name=name, **kwargs)
86it [29:03, 19.88s/it]

Saved 11136 rows.

Read file 86.


  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
87it [29:27, 21.25s/it]

Saved 11399 rows.

Read file 87.


  s = pd.Series(data, index=index, name=name, **kwargs)
88it [29:45, 20.28s/it]

Saved 11411 rows.

Read file 88.


  s = pd.Series(data, index=index, name=name, **kwargs)
89it [30:05, 20.10s/it]

Saved 11495 rows.

Read file 89.


  s = pd.Series(data, index=index, name=name, **kwargs)
90it [30:24, 19.95s/it]

Saved 11511 rows.

Read file 90.


  s = pd.Series(data, index=index, name=name, **kwargs)
91it [30:43, 19.47s/it]

Saved 11376 rows.

Read file 91.


  s = pd.Series(data, index=index, name=name, **kwargs)
92it [31:11, 22.25s/it]

Saved 11320 rows.

Read file 92.


  s = pd.Series(data, index=index, name=name, **kwargs)
93it [31:30, 21.12s/it]

Saved 11373 rows.

Read file 93.


  s = pd.Series(data, index=index, name=name, **kwargs)
94it [31:48, 20.31s/it]

Saved 11244 rows.

Read file 94.


  s = pd.Series(data, index=index, name=name, **kwargs)
95it [32:06, 19.60s/it]

Saved 11495 rows.

Read file 95.


  s = pd.Series(data, index=index, name=name, **kwargs)
96it [32:32, 21.45s/it]

Saved 11438 rows.

Read file 96.


  s = pd.Series(data, index=index, name=name, **kwargs)
97it [32:51, 20.80s/it]

Saved 11337 rows.

Read file 97.


  s = pd.Series(data, index=index, name=name, **kwargs)
98it [33:20, 23.23s/it]

Saved 11278 rows.

Read file 98.


  s = pd.Series(data, index=index, name=name, **kwargs)
99it [33:37, 21.45s/it]

Saved 11238 rows.

Read file 99.


  s = pd.Series(data, index=index, name=name, **kwargs)
100it [33:56, 20.51s/it]

Saved 11286 rows.

Read file 100.


  s = pd.Series(data, index=index, name=name, **kwargs)
101it [34:27, 23.72s/it]

Saved 11508 rows.

Read file 101.


  s = pd.Series(data, index=index, name=name, **kwargs)
102it [35:02, 27.03s/it]

Saved 11482 rows.

Read file 102.


  s = pd.Series(data, index=index, name=name, **kwargs)
103it [35:26, 26.24s/it]

Saved 11769 rows.

Read file 103.


  s = pd.Series(data, index=index, name=name, **kwargs)
104it [35:44, 23.80s/it]

Saved 11444 rows.

Read file 104.
CBG length incorrect: CA:59150078


  s = pd.Series(data, index=index, name=name, **kwargs)
105it [36:16, 26.13s/it]

Saved 17138 rows.

Read file 105.


  s = pd.Series(data, index=index, name=name, **kwargs)
106it [36:36, 24.27s/it]

Saved 11712 rows.

Read file 106.


  s = pd.Series(data, index=index, name=name, **kwargs)
107it [36:55, 22.86s/it]

Saved 11451 rows.

Read file 107.


  s = pd.Series(data, index=index, name=name, **kwargs)
108it [37:13, 21.42s/it]

Saved 11174 rows.

Read file 108.


  s = pd.Series(data, index=index, name=name, **kwargs)
109it [37:36, 21.88s/it]

Saved 11424 rows.

Read file 109.


  s = pd.Series(data, index=index, name=name, **kwargs)
110it [38:03, 23.29s/it]

Saved 11151 rows.

Read file 110.


  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
111it [38:21, 21.72s/it]

Saved 11455 rows.

Read file 111.


  s = pd.Series(data, index=index, name=name, **kwargs)
112it [38:51, 24.18s/it]

Saved 11387 rows.

Read file 112.


  s = pd.Series(data, index=index, name=name, **kwargs)
113it [39:11, 22.99s/it]

Saved 11478 rows.

Read file 113.


  s = pd.Series(data, index=index, name=name, **kwargs)
114it [39:31, 22.04s/it]

Saved 11523 rows.

Read file 114.


  s = pd.Series(data, index=index, name=name, **kwargs)
115it [39:50, 21.24s/it]

Saved 11399 rows.

Read file 115.


  s = pd.Series(data, index=index, name=name, **kwargs)
116it [40:23, 24.54s/it]

Saved 11306 rows.

Read file 116.


  s = pd.Series(data, index=index, name=name, **kwargs)
117it [40:43, 23.28s/it]

Saved 11356 rows.

Read file 117.


  s = pd.Series(data, index=index, name=name, **kwargs)
118it [41:04, 22.53s/it]

Saved 11242 rows.

Read file 118.


  s = pd.Series(data, index=index, name=name, **kwargs)
119it [41:44, 27.79s/it]

Saved 11512 rows.

Read file 119.


  s = pd.Series(data, index=index, name=name, **kwargs)
120it [42:07, 21.06s/it]
0it [00:00, ?it/s]

Saved 11390 rows.
--------done: month 08--------

Read file 0.


  s = pd.Series(data, index=index, name=name, **kwargs)
1it [00:21, 21.56s/it]

Saved 11866 rows.

Read file 1.


  s = pd.Series(data, index=index, name=name, **kwargs)
2it [00:52, 27.34s/it]

Saved 11757 rows.

Read file 2.


  s = pd.Series(data, index=index, name=name, **kwargs)
3it [01:11, 23.34s/it]

Saved 11681 rows.

Read file 3.


  s = pd.Series(data, index=index, name=name, **kwargs)
4it [01:29, 21.19s/it]

Saved 11825 rows.

Read file 4.


  s = pd.Series(data, index=index, name=name, **kwargs)
5it [01:51, 21.33s/it]

Saved 11808 rows.

Read file 5.


  s = pd.Series(data, index=index, name=name, **kwargs)
6it [02:21, 24.61s/it]

Saved 11801 rows.

Read file 6.


  s = pd.Series(data, index=index, name=name, **kwargs)
7it [02:40, 22.70s/it]

Saved 12127 rows.

Read file 7.


  s = pd.Series(data, index=index, name=name, **kwargs)
8it [03:10, 25.01s/it]

Saved 11908 rows.

Read file 8.
CBG length incorrect: CA:59150078


  s = pd.Series(data, index=index, name=name, **kwargs)
9it [03:52, 30.30s/it]

Saved 18761 rows.

Read file 9.


  s = pd.Series(data, index=index, name=name, **kwargs)
10it [04:12, 27.23s/it]

Saved 12113 rows.

Read file 10.


  s = pd.Series(data, index=index, name=name, **kwargs)
11it [04:31, 24.67s/it]

Saved 11742 rows.

Read file 11.


  s = pd.Series(data, index=index, name=name, **kwargs)
12it [04:50, 22.80s/it]

Saved 11642 rows.

Read file 12.


  s = pd.Series(data, index=index, name=name, **kwargs)
13it [05:09, 21.62s/it]

Saved 11901 rows.

Read file 13.


  s = pd.Series(data, index=index, name=name, **kwargs)
14it [05:27, 20.65s/it]

Saved 11562 rows.

Read file 14.


  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
15it [05:48, 20.83s/it]

Saved 11871 rows.

Read file 15.


  s = pd.Series(data, index=index, name=name, **kwargs)
16it [06:06, 19.99s/it]

Saved 11902 rows.

Read file 16.


  s = pd.Series(data, index=index, name=name, **kwargs)
17it [06:26, 19.82s/it]

Saved 11950 rows.

Read file 17.


  s = pd.Series(data, index=index, name=name, **kwargs)
18it [06:45, 19.65s/it]

Saved 12023 rows.

Read file 18.


  s = pd.Series(data, index=index, name=name, **kwargs)
19it [07:04, 19.40s/it]

Saved 11821 rows.

Read file 19.


  s = pd.Series(data, index=index, name=name, **kwargs)
20it [07:27, 20.52s/it]

Saved 11860 rows.

Read file 20.


  s = pd.Series(data, index=index, name=name, **kwargs)
21it [08:13, 28.15s/it]

Saved 11740 rows.

Read file 21.


  s = pd.Series(data, index=index, name=name, **kwargs)
22it [08:34, 25.89s/it]

Saved 11800 rows.

Read file 22.


  s = pd.Series(data, index=index, name=name, **kwargs)
23it [08:54, 24.22s/it]

Saved 11794 rows.

Read file 23.


  s = pd.Series(data, index=index, name=name, **kwargs)
24it [09:13, 22.51s/it]

Saved 11905 rows.

Read file 24.


  s = pd.Series(data, index=index, name=name, **kwargs)
25it [09:34, 22.27s/it]

Saved 11524 rows.

Read file 25.


  s = pd.Series(data, index=index, name=name, **kwargs)
26it [09:54, 21.42s/it]

Saved 11442 rows.

Read file 26.


  s = pd.Series(data, index=index, name=name, **kwargs)
27it [10:13, 20.80s/it]

Saved 11455 rows.

Read file 27.


  s = pd.Series(data, index=index, name=name, **kwargs)
28it [10:33, 20.42s/it]

Saved 11550 rows.

Read file 28.


  s = pd.Series(data, index=index, name=name, **kwargs)
29it [10:52, 20.04s/it]

Saved 11560 rows.

Read file 29.


  s = pd.Series(data, index=index, name=name, **kwargs)
30it [11:11, 19.94s/it]

Saved 11496 rows.

Read file 30.


  s = pd.Series(data, index=index, name=name, **kwargs)
31it [11:42, 23.14s/it]

Saved 11997 rows.

Read file 31.


  s = pd.Series(data, index=index, name=name, **kwargs)
32it [12:12, 25.10s/it]

Saved 11551 rows.

Read file 32.
CBG length incorrect: CA:59150078


  s = pd.Series(data, index=index, name=name, **kwargs)
33it [12:45, 27.45s/it]

Saved 18122 rows.

Read file 33.


  s = pd.Series(data, index=index, name=name, **kwargs)
34it [13:04, 24.89s/it]

Saved 11818 rows.

Read file 34.


  s = pd.Series(data, index=index, name=name, **kwargs)
35it [13:23, 23.19s/it]

Saved 11496 rows.

Read file 35.


  s = pd.Series(data, index=index, name=name, **kwargs)
36it [13:41, 21.78s/it]

Saved 11385 rows.

Read file 36.


  s = pd.Series(data, index=index, name=name, **kwargs)
37it [14:01, 21.30s/it]

Saved 11624 rows.

Read file 37.


  s = pd.Series(data, index=index, name=name, **kwargs)
38it [14:22, 21.03s/it]

Saved 11319 rows.

Read file 38.


  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
39it [14:41, 20.49s/it]

Saved 11588 rows.

Read file 39.


  s = pd.Series(data, index=index, name=name, **kwargs)
40it [15:01, 20.20s/it]

Saved 11583 rows.

Read file 40.


  s = pd.Series(data, index=index, name=name, **kwargs)
41it [15:20, 19.88s/it]

Saved 11677 rows.

Read file 41.


  s = pd.Series(data, index=index, name=name, **kwargs)
42it [15:39, 19.66s/it]

Saved 11728 rows.

Read file 42.


  s = pd.Series(data, index=index, name=name, **kwargs)
43it [15:58, 19.57s/it]

Saved 11661 rows.

Read file 43.


  s = pd.Series(data, index=index, name=name, **kwargs)
44it [16:18, 19.49s/it]

Saved 11503 rows.

Read file 44.


  s = pd.Series(data, index=index, name=name, **kwargs)
45it [16:37, 19.47s/it]

Saved 11519 rows.

Read file 45.


  s = pd.Series(data, index=index, name=name, **kwargs)
46it [16:55, 19.14s/it]

Saved 11480 rows.

Read file 46.


  s = pd.Series(data, index=index, name=name, **kwargs)
47it [17:13, 18.81s/it]

Saved 11511 rows.

Read file 47.


  s = pd.Series(data, index=index, name=name, **kwargs)
48it [17:33, 18.97s/it]

Saved 11599 rows.

Read file 48.


  s = pd.Series(data, index=index, name=name, **kwargs)
49it [18:15, 26.04s/it]

Saved 11569 rows.

Read file 49.


  s = pd.Series(data, index=index, name=name, **kwargs)
50it [18:46, 27.55s/it]

Saved 11564 rows.

Read file 50.


  s = pd.Series(data, index=index, name=name, **kwargs)
51it [19:27, 31.35s/it]

Saved 11501 rows.

Read file 51.


  s = pd.Series(data, index=index, name=name, **kwargs)
52it [19:48, 28.50s/it]

Saved 11521 rows.

Read file 52.


  s = pd.Series(data, index=index, name=name, **kwargs)
53it [20:07, 25.54s/it]

Saved 11750 rows.

Read file 53.


  s = pd.Series(data, index=index, name=name, **kwargs)
54it [20:39, 27.42s/it]

Saved 11617 rows.

Read file 54.


  s = pd.Series(data, index=index, name=name, **kwargs)
55it [20:59, 25.28s/it]

Saved 12007 rows.

Read file 55.


  s = pd.Series(data, index=index, name=name, **kwargs)
56it [21:18, 23.37s/it]

Saved 11636 rows.

Read file 56.
CBG length incorrect: CA:59150078


  s = pd.Series(data, index=index, name=name, **kwargs)
57it [21:45, 24.52s/it]

Saved 16772 rows.

Read file 57.


  s = pd.Series(data, index=index, name=name, **kwargs)
58it [22:06, 23.52s/it]

Saved 11774 rows.

Read file 58.


  s = pd.Series(data, index=index, name=name, **kwargs)
59it [22:29, 23.30s/it]

Saved 11681 rows.

Read file 59.


  s = pd.Series(data, index=index, name=name, **kwargs)
60it [23:04, 26.90s/it]

Saved 11401 rows.

Read file 60.


  s = pd.Series(data, index=index, name=name, **kwargs)
61it [23:24, 24.65s/it]

Saved 11841 rows.

Read file 61.


  s = pd.Series(data, index=index, name=name, **kwargs)
62it [23:43, 23.03s/it]

Saved 11453 rows.

Read file 62.


  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
63it [24:02, 21.85s/it]

Saved 11629 rows.

Read file 63.


  s = pd.Series(data, index=index, name=name, **kwargs)
64it [24:21, 20.85s/it]

Saved 11618 rows.

Read file 64.


  s = pd.Series(data, index=index, name=name, **kwargs)
65it [24:41, 20.58s/it]

Saved 11858 rows.

Read file 65.


  s = pd.Series(data, index=index, name=name, **kwargs)
66it [25:01, 20.48s/it]

Saved 11722 rows.

Read file 66.


  s = pd.Series(data, index=index, name=name, **kwargs)
67it [25:26, 21.83s/it]

Saved 11644 rows.

Read file 67.


  s = pd.Series(data, index=index, name=name, **kwargs)
68it [25:45, 21.00s/it]

Saved 11492 rows.

Read file 68.


  s = pd.Series(data, index=index, name=name, **kwargs)
69it [26:03, 20.06s/it]

Saved 11609 rows.

Read file 69.


  s = pd.Series(data, index=index, name=name, **kwargs)
70it [26:35, 23.54s/it]

Saved 11481 rows.

Read file 70.


  s = pd.Series(data, index=index, name=name, **kwargs)
71it [27:08, 26.51s/it]

Saved 11748 rows.

Read file 71.


  s = pd.Series(data, index=index, name=name, **kwargs)
72it [27:27, 24.28s/it]

Saved 11583 rows.

Read file 72.


  s = pd.Series(data, index=index, name=name, **kwargs)
73it [27:58, 26.32s/it]

Saved 11530 rows.

Read file 73.


  s = pd.Series(data, index=index, name=name, **kwargs)
74it [28:19, 24.76s/it]

Saved 11524 rows.

Read file 74.


  s = pd.Series(data, index=index, name=name, **kwargs)
75it [28:38, 22.92s/it]

Saved 11677 rows.

Read file 75.


  s = pd.Series(data, index=index, name=name, **kwargs)
76it [28:57, 21.90s/it]

Saved 11489 rows.

Read file 76.


  s = pd.Series(data, index=index, name=name, **kwargs)
77it [29:16, 20.84s/it]

Saved 11630 rows.

Read file 77.


  s = pd.Series(data, index=index, name=name, **kwargs)
78it [29:34, 20.07s/it]

Saved 11555 rows.

Read file 78.


  s = pd.Series(data, index=index, name=name, **kwargs)
79it [29:52, 19.51s/it]

Saved 11972 rows.

Read file 79.


  s = pd.Series(data, index=index, name=name, **kwargs)
80it [30:10, 19.13s/it]

Saved 11698 rows.

Read file 80.
CBG length incorrect: CA:59150078


  s = pd.Series(data, index=index, name=name, **kwargs)
81it [30:42, 22.84s/it]

Saved 15908 rows.

Read file 81.


  s = pd.Series(data, index=index, name=name, **kwargs)
82it [31:06, 23.09s/it]

Saved 11703 rows.

Read file 82.


  s = pd.Series(data, index=index, name=name, **kwargs)
83it [31:28, 22.86s/it]

Saved 11721 rows.

Read file 83.


  s = pd.Series(data, index=index, name=name, **kwargs)
84it [31:49, 22.24s/it]

Saved 11325 rows.

Read file 84.


  s = pd.Series(data, index=index, name=name, **kwargs)
85it [32:08, 21.31s/it]

Saved 11742 rows.

Read file 85.


  s = pd.Series(data, index=index, name=name, **kwargs)
86it [32:27, 20.66s/it]

Saved 11465 rows.

Read file 86.


  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
87it [32:50, 21.32s/it]

Saved 11570 rows.

Read file 87.


  s = pd.Series(data, index=index, name=name, **kwargs)
88it [33:11, 21.18s/it]

Saved 11659 rows.

Read file 88.


  s = pd.Series(data, index=index, name=name, **kwargs)
89it [33:29, 20.31s/it]

Saved 11856 rows.

Read file 89.


  s = pd.Series(data, index=index, name=name, **kwargs)
90it [33:51, 20.76s/it]

Saved 11639 rows.

Read file 90.


  s = pd.Series(data, index=index, name=name, **kwargs)
91it [34:09, 20.06s/it]

Saved 11647 rows.

Read file 91.


  s = pd.Series(data, index=index, name=name, **kwargs)
92it [34:35, 21.85s/it]

Saved 11421 rows.

Read file 92.


  s = pd.Series(data, index=index, name=name, **kwargs)
93it [35:22, 29.24s/it]

Saved 11458 rows.

Read file 93.


  s = pd.Series(data, index=index, name=name, **kwargs)
94it [35:55, 30.53s/it]

Saved 11515 rows.

Read file 94.


  s = pd.Series(data, index=index, name=name, **kwargs)
95it [36:31, 32.15s/it]

Saved 11694 rows.

Read file 95.


  s = pd.Series(data, index=index, name=name, **kwargs)
96it [36:51, 23.04s/it]
0it [00:00, ?it/s]

Saved 11552 rows.
--------done: month 09--------

Read file 0.


  s = pd.Series(data, index=index, name=name, **kwargs)
1it [00:29, 29.08s/it]

Saved 11877 rows.

Read file 1.


  s = pd.Series(data, index=index, name=name, **kwargs)
2it [00:51, 25.15s/it]

Saved 11699 rows.

Read file 2.


  s = pd.Series(data, index=index, name=name, **kwargs)
3it [01:09, 22.01s/it]

Saved 11755 rows.

Read file 3.


  s = pd.Series(data, index=index, name=name, **kwargs)
4it [01:29, 21.11s/it]

Saved 11870 rows.

Read file 4.


  s = pd.Series(data, index=index, name=name, **kwargs)
5it [01:47, 19.97s/it]

Saved 11756 rows.

Read file 5.


  s = pd.Series(data, index=index, name=name, **kwargs)
6it [02:06, 19.58s/it]

Saved 11774 rows.

Read file 6.


  s = pd.Series(data, index=index, name=name, **kwargs)
7it [02:30, 21.00s/it]

Saved 12097 rows.

Read file 7.


  s = pd.Series(data, index=index, name=name, **kwargs)
8it [02:57, 23.08s/it]

Saved 11889 rows.

Read file 8.
CBG length incorrect: CA:59150078


  s = pd.Series(data, index=index, name=name, **kwargs)
9it [03:38, 28.76s/it]

Saved 18769 rows.

Read file 9.


  s = pd.Series(data, index=index, name=name, **kwargs)
10it [04:07, 28.75s/it]

Saved 12099 rows.

Read file 10.


  s = pd.Series(data, index=index, name=name, **kwargs)
11it [04:38, 29.23s/it]

Saved 11729 rows.

Read file 11.


  s = pd.Series(data, index=index, name=name, **kwargs)
12it [05:11, 30.43s/it]

Saved 11687 rows.

Read file 12.


  s = pd.Series(data, index=index, name=name, **kwargs)
13it [05:38, 29.38s/it]

Saved 11866 rows.

Read file 13.


  s = pd.Series(data, index=index, name=name, **kwargs)
14it [06:01, 27.69s/it]

Saved 11537 rows.

Read file 14.


  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
15it [06:34, 29.17s/it]

Saved 11803 rows.

Read file 15.


  s = pd.Series(data, index=index, name=name, **kwargs)
16it [07:00, 28.08s/it]

Saved 11896 rows.

Read file 16.


  s = pd.Series(data, index=index, name=name, **kwargs)
17it [07:27, 27.96s/it]

Saved 11975 rows.

Read file 17.


  s = pd.Series(data, index=index, name=name, **kwargs)
18it [07:49, 26.14s/it]

Saved 11985 rows.

Read file 18.


  s = pd.Series(data, index=index, name=name, **kwargs)
19it [08:07, 23.65s/it]

Saved 11874 rows.

Read file 19.


  s = pd.Series(data, index=index, name=name, **kwargs)
20it [08:39, 26.27s/it]

Saved 11810 rows.

Read file 20.


  s = pd.Series(data, index=index, name=name, **kwargs)
21it [08:59, 24.27s/it]

Saved 11744 rows.

Read file 21.


  s = pd.Series(data, index=index, name=name, **kwargs)
22it [09:17, 22.45s/it]

Saved 11727 rows.

Read file 22.


  s = pd.Series(data, index=index, name=name, **kwargs)
23it [09:54, 26.66s/it]

Saved 11734 rows.

Read file 23.


  s = pd.Series(data, index=index, name=name, **kwargs)
24it [10:18, 26.07s/it]

Saved 11814 rows.

Read file 24.


  s = pd.Series(data, index=index, name=name, **kwargs)
25it [10:42, 25.36s/it]

Saved 11833 rows.

Read file 25.


  s = pd.Series(data, index=index, name=name, **kwargs)
26it [11:06, 24.94s/it]

Saved 11773 rows.

Read file 26.


  s = pd.Series(data, index=index, name=name, **kwargs)
27it [11:33, 25.50s/it]

Saved 11698 rows.

Read file 27.


  s = pd.Series(data, index=index, name=name, **kwargs)
28it [11:57, 25.22s/it]

Saved 11831 rows.

Read file 28.


  s = pd.Series(data, index=index, name=name, **kwargs)
29it [12:19, 24.26s/it]

Saved 11766 rows.

Read file 29.


  s = pd.Series(data, index=index, name=name, **kwargs)
30it [12:47, 25.16s/it]

Saved 11781 rows.

Read file 30.


  s = pd.Series(data, index=index, name=name, **kwargs)
31it [13:18, 27.16s/it]

Saved 12117 rows.

Read file 31.


  s = pd.Series(data, index=index, name=name, **kwargs)
32it [13:39, 25.02s/it]

Saved 11880 rows.

Read file 32.
CBG length incorrect: CA:59150078


  s = pd.Series(data, index=index, name=name, **kwargs)
33it [14:28, 32.26s/it]

Saved 18972 rows.

Read file 33.


  s = pd.Series(data, index=index, name=name, **kwargs)
34it [14:50, 29.43s/it]

Saved 12052 rows.

Read file 34.


  s = pd.Series(data, index=index, name=name, **kwargs)
35it [15:10, 26.40s/it]

Saved 11760 rows.

Read file 35.


  s = pd.Series(data, index=index, name=name, **kwargs)
36it [15:34, 25.81s/it]

Saved 11635 rows.

Read file 36.


  s = pd.Series(data, index=index, name=name, **kwargs)
37it [15:58, 25.19s/it]

Saved 11839 rows.

Read file 37.


  s = pd.Series(data, index=index, name=name, **kwargs)
38it [16:14, 22.53s/it]

Saved 11536 rows.

Read file 38.


  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
39it [16:46, 25.27s/it]

Saved 11874 rows.

Read file 39.


  s = pd.Series(data, index=index, name=name, **kwargs)
40it [17:07, 23.98s/it]

Saved 11823 rows.

Read file 40.


  s = pd.Series(data, index=index, name=name, **kwargs)
41it [17:26, 22.44s/it]

Saved 11920 rows.

Read file 41.


  s = pd.Series(data, index=index, name=name, **kwargs)
42it [17:43, 20.74s/it]

Saved 11824 rows.

Read file 42.


  s = pd.Series(data, index=index, name=name, **kwargs)
43it [17:59, 19.58s/it]

Saved 11795 rows.

Read file 43.


  s = pd.Series(data, index=index, name=name, **kwargs)
44it [18:26, 21.61s/it]

Saved 11816 rows.

Read file 44.


  s = pd.Series(data, index=index, name=name, **kwargs)
45it [18:43, 20.38s/it]

Saved 11638 rows.

Read file 45.


  s = pd.Series(data, index=index, name=name, **kwargs)
46it [19:12, 22.96s/it]

Saved 11703 rows.

Read file 46.


  s = pd.Series(data, index=index, name=name, **kwargs)
47it [19:36, 23.28s/it]

Saved 11706 rows.

Read file 47.


  s = pd.Series(data, index=index, name=name, **kwargs)
48it [20:07, 25.43s/it]

Saved 11833 rows.

Read file 48.


  s = pd.Series(data, index=index, name=name, **kwargs)
49it [20:33, 25.72s/it]

Saved 11821 rows.

Read file 49.


  s = pd.Series(data, index=index, name=name, **kwargs)
50it [21:08, 28.39s/it]

Saved 11704 rows.

Read file 50.


  s = pd.Series(data, index=index, name=name, **kwargs)
51it [21:41, 29.85s/it]

Saved 11659 rows.

Read file 51.


  s = pd.Series(data, index=index, name=name, **kwargs)
52it [22:07, 28.82s/it]

Saved 11828 rows.

Read file 52.


  s = pd.Series(data, index=index, name=name, **kwargs)
53it [22:27, 26.02s/it]

Saved 11760 rows.

Read file 53.


  s = pd.Series(data, index=index, name=name, **kwargs)
54it [22:48, 24.49s/it]

Saved 11778 rows.

Read file 54.


  s = pd.Series(data, index=index, name=name, **kwargs)
55it [23:21, 27.01s/it]

Saved 12081 rows.

Read file 55.


  s = pd.Series(data, index=index, name=name, **kwargs)
56it [23:38, 24.09s/it]

Saved 11876 rows.

Read file 56.
CBG length incorrect: CA:59150078


  s = pd.Series(data, index=index, name=name, **kwargs)
57it [24:28, 31.78s/it]

Saved 18947 rows.

Read file 57.


  s = pd.Series(data, index=index, name=name, **kwargs)
58it [25:00, 31.87s/it]

Saved 12063 rows.

Read file 58.


  s = pd.Series(data, index=index, name=name, **kwargs)
59it [25:18, 27.76s/it]

Saved 11715 rows.

Read file 59.


  s = pd.Series(data, index=index, name=name, **kwargs)
60it [25:39, 25.60s/it]

Saved 11632 rows.

Read file 60.


  s = pd.Series(data, index=index, name=name, **kwargs)
61it [26:14, 28.64s/it]

Saved 11849 rows.

Read file 61.


  s = pd.Series(data, index=index, name=name, **kwargs)
62it [26:38, 27.31s/it]

Saved 11545 rows.

Read file 62.


  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
63it [27:00, 25.60s/it]

Saved 11845 rows.

Read file 63.


  s = pd.Series(data, index=index, name=name, **kwargs)
64it [27:21, 24.20s/it]

Saved 11860 rows.

Read file 64.


  s = pd.Series(data, index=index, name=name, **kwargs)
65it [27:47, 24.66s/it]

Saved 11974 rows.

Read file 65.


  s = pd.Series(data, index=index, name=name, **kwargs)
66it [28:03, 22.07s/it]

Saved 11845 rows.

Read file 66.


  s = pd.Series(data, index=index, name=name, **kwargs)
67it [28:20, 20.66s/it]

Saved 11868 rows.

Read file 67.


  s = pd.Series(data, index=index, name=name, **kwargs)
68it [29:10, 29.34s/it]

Saved 11826 rows.

Read file 68.


  s = pd.Series(data, index=index, name=name, **kwargs)
69it [29:28, 25.89s/it]

Saved 11723 rows.

Read file 69.


  s = pd.Series(data, index=index, name=name, **kwargs)
70it [29:45, 23.39s/it]

Saved 11734 rows.

Read file 70.


  s = pd.Series(data, index=index, name=name, **kwargs)
71it [30:24, 27.96s/it]

Saved 11746 rows.

Read file 71.


  s = pd.Series(data, index=index, name=name, **kwargs)
72it [30:43, 25.49s/it]

Saved 11830 rows.

Read file 72.


  s = pd.Series(data, index=index, name=name, **kwargs)
73it [31:04, 24.03s/it]

Saved 11903 rows.

Read file 73.


  s = pd.Series(data, index=index, name=name, **kwargs)
74it [31:20, 21.64s/it]

Saved 11671 rows.

Read file 74.


  s = pd.Series(data, index=index, name=name, **kwargs)
75it [31:37, 20.21s/it]

Saved 11681 rows.

Read file 75.


  s = pd.Series(data, index=index, name=name, **kwargs)
76it [31:53, 18.99s/it]

Saved 11794 rows.

Read file 76.


  s = pd.Series(data, index=index, name=name, **kwargs)
77it [32:10, 18.44s/it]

Saved 11947 rows.

Read file 77.


  s = pd.Series(data, index=index, name=name, **kwargs)
78it [32:48, 24.19s/it]

Saved 11864 rows.

Read file 78.


  s = pd.Series(data, index=index, name=name, **kwargs)
79it [33:04, 21.84s/it]

Saved 12248 rows.

Read file 79.


  s = pd.Series(data, index=index, name=name, **kwargs)
80it [33:19, 19.84s/it]

Saved 11799 rows.

Read file 80.


  s = pd.Series(data, index=index, name=name, **kwargs)
81it [33:47, 22.00s/it]

CBG length incorrect: CA:59150078
Saved 17810 rows.

Read file 81.


  s = pd.Series(data, index=index, name=name, **kwargs)
82it [34:03, 20.32s/it]

Saved 12078 rows.

Read file 82.


  s = pd.Series(data, index=index, name=name, **kwargs)
83it [34:19, 18.91s/it]

Saved 11929 rows.

Read file 83.


  s = pd.Series(data, index=index, name=name, **kwargs)
84it [34:34, 18.01s/it]

Saved 11636 rows.

Read file 84.


  s = pd.Series(data, index=index, name=name, **kwargs)
85it [35:12, 23.82s/it]

Saved 11874 rows.

Read file 85.


  s = pd.Series(data, index=index, name=name, **kwargs)
86it [35:30, 22.04s/it]

Saved 11518 rows.

Read file 86.


  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
87it [36:00, 24.38s/it]

Saved 11837 rows.

Read file 87.


  s = pd.Series(data, index=index, name=name, **kwargs)
88it [36:44, 30.25s/it]

Saved 11810 rows.

Read file 88.


  s = pd.Series(data, index=index, name=name, **kwargs)
89it [37:13, 29.90s/it]

Saved 11939 rows.

Read file 89.


  s = pd.Series(data, index=index, name=name, **kwargs)
90it [37:31, 26.32s/it]

Saved 11999 rows.

Read file 90.


  s = pd.Series(data, index=index, name=name, **kwargs)
91it [38:04, 28.32s/it]

Saved 11840 rows.

Read file 91.


  s = pd.Series(data, index=index, name=name, **kwargs)
92it [38:32, 28.37s/it]

Saved 11763 rows.

Read file 92.


  s = pd.Series(data, index=index, name=name, **kwargs)
93it [39:10, 31.22s/it]

Saved 11764 rows.

Read file 93.


  s = pd.Series(data, index=index, name=name, **kwargs)
94it [40:01, 37.11s/it]

Saved 11641 rows.

Read file 94.


  s = pd.Series(data, index=index, name=name, **kwargs)
95it [40:19, 31.32s/it]

Saved 11973 rows.

Read file 95.


  s = pd.Series(data, index=index, name=name, **kwargs)
96it [40:55, 25.58s/it]
0it [00:00, ?it/s]

Saved 11782 rows.
--------done: month 10--------

Read file 0.


  s = pd.Series(data, index=index, name=name, **kwargs)
1it [00:16, 16.83s/it]

Saved 12047 rows.

Read file 1.


  s = pd.Series(data, index=index, name=name, **kwargs)
2it [00:35, 18.00s/it]

Saved 11950 rows.

Read file 2.


  s = pd.Series(data, index=index, name=name, **kwargs)
3it [00:51, 17.14s/it]

Saved 11903 rows.

Read file 3.


  s = pd.Series(data, index=index, name=name, **kwargs)
4it [01:07, 16.70s/it]

Saved 12073 rows.

Read file 4.


  s = pd.Series(data, index=index, name=name, **kwargs)
5it [01:25, 17.15s/it]

Saved 11937 rows.

Read file 5.


  s = pd.Series(data, index=index, name=name, **kwargs)
6it [01:43, 17.39s/it]

Saved 11972 rows.

Read file 6.


  s = pd.Series(data, index=index, name=name, **kwargs)
7it [02:15, 22.20s/it]

Saved 12240 rows.

Read file 7.


  s = pd.Series(data, index=index, name=name, **kwargs)
8it [02:34, 21.00s/it]

Saved 12153 rows.

Read file 8.
CBG length incorrect: CA:59150078


  s = pd.Series(data, index=index, name=name, **kwargs)
9it [03:12, 26.32s/it]

Saved 19517 rows.

Read file 9.


  s = pd.Series(data, index=index, name=name, **kwargs)
10it [03:40, 26.93s/it]

Saved 12074 rows.

Read file 10.


  s = pd.Series(data, index=index, name=name, **kwargs)
11it [03:56, 23.46s/it]

Saved 11934 rows.

Read file 11.


  s = pd.Series(data, index=index, name=name, **kwargs)
12it [04:26, 25.52s/it]

Saved 11854 rows.

Read file 12.


  s = pd.Series(data, index=index, name=name, **kwargs)
13it [04:43, 23.15s/it]

Saved 12065 rows.

Read file 13.


  s = pd.Series(data, index=index, name=name, **kwargs)
14it [04:59, 20.95s/it]

Saved 11787 rows.

Read file 14.


  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
15it [05:20, 20.78s/it]

Saved 12024 rows.

Read file 15.


  s = pd.Series(data, index=index, name=name, **kwargs)
16it [05:36, 19.39s/it]

Saved 12040 rows.

Read file 16.


  s = pd.Series(data, index=index, name=name, **kwargs)
17it [06:04, 22.12s/it]

Saved 12184 rows.

Read file 17.


  s = pd.Series(data, index=index, name=name, **kwargs)
18it [06:23, 21.05s/it]

Saved 12053 rows.

Read file 18.


  s = pd.Series(data, index=index, name=name, **kwargs)
19it [06:41, 20.15s/it]

Saved 12024 rows.

Read file 19.


  s = pd.Series(data, index=index, name=name, **kwargs)
20it [06:58, 19.29s/it]

Saved 12033 rows.

Read file 20.


  s = pd.Series(data, index=index, name=name, **kwargs)
21it [07:14, 18.12s/it]

Saved 11966 rows.

Read file 21.


  s = pd.Series(data, index=index, name=name, **kwargs)
22it [07:31, 18.03s/it]

Saved 11981 rows.

Read file 22.


  s = pd.Series(data, index=index, name=name, **kwargs)
23it [08:01, 21.41s/it]

Saved 11988 rows.

Read file 23.


  s = pd.Series(data, index=index, name=name, **kwargs)
24it [08:18, 20.06s/it]

Saved 12015 rows.

Read file 24.


  s = pd.Series(data, index=index, name=name, **kwargs)
25it [08:42, 21.24s/it]

Saved 11971 rows.

Read file 25.


  s = pd.Series(data, index=index, name=name, **kwargs)
26it [08:57, 19.41s/it]

Saved 12007 rows.

Read file 26.


  s = pd.Series(data, index=index, name=name, **kwargs)
27it [09:14, 18.88s/it]

Saved 11942 rows.

Read file 27.


  s = pd.Series(data, index=index, name=name, **kwargs)
28it [09:34, 19.06s/it]

Saved 11926 rows.

Read file 28.


  s = pd.Series(data, index=index, name=name, **kwargs)
29it [09:50, 18.10s/it]

Saved 12101 rows.

Read file 29.


  s = pd.Series(data, index=index, name=name, **kwargs)
30it [10:08, 18.24s/it]

Saved 12002 rows.

Read file 30.


  s = pd.Series(data, index=index, name=name, **kwargs)
31it [10:36, 21.16s/it]

Saved 12350 rows.

Read file 31.


  s = pd.Series(data, index=index, name=name, **kwargs)
32it [10:53, 19.70s/it]

Saved 12019 rows.

Read file 32.


  s = pd.Series(data, index=index, name=name, **kwargs)
33it [11:15, 20.46s/it]

CBG length incorrect: CA:59150078
Saved 17310 rows.

Read file 33.


  s = pd.Series(data, index=index, name=name, **kwargs)
34it [11:32, 19.40s/it]

Saved 12212 rows.

Read file 34.


  s = pd.Series(data, index=index, name=name, **kwargs)
35it [11:49, 18.79s/it]

Saved 12051 rows.

Read file 35.


  s = pd.Series(data, index=index, name=name, **kwargs)
36it [12:09, 19.00s/it]

Saved 11830 rows.

Read file 36.


  s = pd.Series(data, index=index, name=name, **kwargs)
37it [12:29, 19.55s/it]

Saved 12176 rows.

Read file 37.


  s = pd.Series(data, index=index, name=name, **kwargs)
38it [13:11, 26.06s/it]

Saved 11835 rows.

Read file 38.


  s = pd.Series(data, index=index, name=name, **kwargs)
39it [13:28, 23.34s/it]

Saved 11961 rows.

Read file 39.


  s = pd.Series(data, index=index, name=name, **kwargs)
40it [13:48, 22.45s/it]

Saved 11940 rows.

Read file 40.


  s = pd.Series(data, index=index, name=name, **kwargs)
41it [14:09, 21.99s/it]

Saved 12245 rows.

Read file 41.


  s = pd.Series(data, index=index, name=name, **kwargs)
42it [14:40, 24.61s/it]

Saved 12175 rows.

Read file 42.


  s = pd.Series(data, index=index, name=name, **kwargs)
43it [15:11, 26.68s/it]

Saved 11984 rows.

Read file 43.


  s = pd.Series(data, index=index, name=name, **kwargs)
44it [15:31, 24.71s/it]

Saved 11852 rows.

Read file 44.


  s = pd.Series(data, index=index, name=name, **kwargs)
45it [15:57, 25.15s/it]

Saved 11933 rows.

Read file 45.


  s = pd.Series(data, index=index, name=name, **kwargs)
46it [16:20, 24.43s/it]

Saved 11791 rows.

Read file 46.


  s = pd.Series(data, index=index, name=name, **kwargs)
47it [16:46, 24.76s/it]

Saved 12122 rows.

Read file 47.


  s = pd.Series(data, index=index, name=name, **kwargs)
48it [17:14, 25.94s/it]

Saved 11909 rows.

Read file 48.


  s = pd.Series(data, index=index, name=name, **kwargs)
49it [17:57, 31.00s/it]

Saved 12063 rows.

Read file 49.


  s = pd.Series(data, index=index, name=name, **kwargs)
50it [18:23, 29.33s/it]

Saved 11996 rows.

Read file 50.


  s = pd.Series(data, index=index, name=name, **kwargs)
51it [18:51, 29.03s/it]

Saved 11930 rows.

Read file 51.


  s = pd.Series(data, index=index, name=name, **kwargs)
52it [19:12, 26.47s/it]

Saved 12073 rows.

Read file 52.


  s = pd.Series(data, index=index, name=name, **kwargs)
53it [19:32, 24.70s/it]

Saved 11926 rows.

Read file 53.


  s = pd.Series(data, index=index, name=name, **kwargs)
54it [19:52, 23.18s/it]

Saved 11952 rows.

Read file 54.


  s = pd.Series(data, index=index, name=name, **kwargs)
55it [20:13, 22.58s/it]

Saved 12299 rows.

Read file 55.


  s = pd.Series(data, index=index, name=name, **kwargs)
56it [20:36, 22.58s/it]

Saved 12099 rows.

Read file 56.
CBG length incorrect: CA:59150078


  s = pd.Series(data, index=index, name=name, **kwargs)
57it [21:13, 27.03s/it]

Saved 19565 rows.

Read file 57.


  s = pd.Series(data, index=index, name=name, **kwargs)
58it [21:30, 23.94s/it]

Saved 12101 rows.

Read file 58.


  s = pd.Series(data, index=index, name=name, **kwargs)
59it [21:49, 22.47s/it]

Saved 12013 rows.

Read file 59.


  s = pd.Series(data, index=index, name=name, **kwargs)
60it [22:08, 21.46s/it]

Saved 11839 rows.

Read file 60.


  s = pd.Series(data, index=index, name=name, **kwargs)
61it [22:35, 23.14s/it]

Saved 12073 rows.

Read file 61.


  s = pd.Series(data, index=index, name=name, **kwargs)
62it [22:53, 21.78s/it]

Saved 11755 rows.

Read file 62.


  s = pd.Series(data, index=index, name=name, **kwargs)
63it [23:11, 20.55s/it]

Saved 12093 rows.

Read file 63.


  s = pd.Series(data, index=index, name=name, **kwargs)
64it [23:28, 19.35s/it]

Saved 12052 rows.

Read file 64.


  s = pd.Series(data, index=index, name=name, **kwargs)
65it [23:43, 18.20s/it]

Saved 12203 rows.

Read file 65.


  s = pd.Series(data, index=index, name=name, **kwargs)
66it [24:00, 17.81s/it]

Saved 12081 rows.

Read file 66.


  s = pd.Series(data, index=index, name=name, **kwargs)
67it [24:28, 20.83s/it]

Saved 12046 rows.

Read file 67.


  s = pd.Series(data, index=index, name=name, **kwargs)
68it [24:44, 19.49s/it]

Saved 12048 rows.

Read file 68.


  s = pd.Series(data, index=index, name=name, **kwargs)
69it [25:36, 29.28s/it]

Saved 11962 rows.

Read file 69.


  s = pd.Series(data, index=index, name=name, **kwargs)
70it [25:52, 25.23s/it]

Saved 11916 rows.

Read file 70.


  s = pd.Series(data, index=index, name=name, **kwargs)
71it [26:13, 23.84s/it]

Saved 11971 rows.

Read file 71.


  s = pd.Series(data, index=index, name=name, **kwargs)
72it [26:29, 21.55s/it]

Saved 12092 rows.

Read file 72.


  s = pd.Series(data, index=index, name=name, **kwargs)
73it [26:44, 19.65s/it]

Saved 12064 rows.

Read file 73.


  s = pd.Series(data, index=index, name=name, **kwargs)
74it [27:00, 18.58s/it]

Saved 11908 rows.

Read file 74.


  s = pd.Series(data, index=index, name=name, **kwargs)
75it [27:17, 17.94s/it]

Saved 11895 rows.

Read file 75.


  s = pd.Series(data, index=index, name=name, **kwargs)
76it [27:33, 17.36s/it]

Saved 11998 rows.

Read file 76.


  s = pd.Series(data, index=index, name=name, **kwargs)
77it [27:48, 16.78s/it]

Saved 11898 rows.

Read file 77.


  s = pd.Series(data, index=index, name=name, **kwargs)
78it [28:03, 16.27s/it]

Saved 11923 rows.

Read file 78.


  s = pd.Series(data, index=index, name=name, **kwargs)
79it [28:19, 16.00s/it]

Saved 12296 rows.

Read file 79.


  s = pd.Series(data, index=index, name=name, **kwargs)
80it [28:34, 15.93s/it]

Saved 12086 rows.

Read file 80.
CBG length incorrect: CA:59150078


  s = pd.Series(data, index=index, name=name, **kwargs)
81it [29:01, 19.14s/it]

Saved 19214 rows.

Read file 81.


  s = pd.Series(data, index=index, name=name, **kwargs)
82it [29:17, 18.05s/it]

Saved 12262 rows.

Read file 82.


  s = pd.Series(data, index=index, name=name, **kwargs)
83it [29:33, 17.67s/it]

Saved 11956 rows.

Read file 83.


  s = pd.Series(data, index=index, name=name, **kwargs)
84it [29:49, 17.03s/it]

Saved 11763 rows.

Read file 84.


  s = pd.Series(data, index=index, name=name, **kwargs)
85it [30:05, 16.66s/it]

Saved 12076 rows.

Read file 85.


  s = pd.Series(data, index=index, name=name, **kwargs)
86it [30:23, 17.28s/it]

Saved 11692 rows.

Read file 86.


  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
87it [30:51, 20.28s/it]

Saved 12024 rows.

Read file 87.


  s = pd.Series(data, index=index, name=name, **kwargs)
88it [31:08, 19.40s/it]

Saved 11987 rows.

Read file 88.


  s = pd.Series(data, index=index, name=name, **kwargs)
89it [31:26, 18.81s/it]

Saved 12115 rows.

Read file 89.


  s = pd.Series(data, index=index, name=name, **kwargs)
90it [31:42, 18.17s/it]

Saved 12050 rows.

Read file 90.


  s = pd.Series(data, index=index, name=name, **kwargs)
91it [31:58, 17.50s/it]

Saved 11967 rows.

Read file 91.


  s = pd.Series(data, index=index, name=name, **kwargs)
92it [32:15, 17.37s/it]

Saved 11989 rows.

Read file 92.


  s = pd.Series(data, index=index, name=name, **kwargs)
93it [32:31, 16.97s/it]

Saved 11975 rows.

Read file 93.


  s = pd.Series(data, index=index, name=name, **kwargs)
94it [33:01, 20.75s/it]

Saved 11952 rows.

Read file 94.


  s = pd.Series(data, index=index, name=name, **kwargs)
95it [33:16, 19.23s/it]

Saved 11948 rows.

Read file 95.


  s = pd.Series(data, index=index, name=name, **kwargs)
96it [33:34, 18.71s/it]

Saved 11985 rows.

Read file 96.


  s = pd.Series(data, index=index, name=name, **kwargs)
97it [33:52, 18.45s/it]

Saved 12027 rows.

Read file 97.


  s = pd.Series(data, index=index, name=name, **kwargs)
98it [34:08, 17.78s/it]

Saved 11914 rows.

Read file 98.


  s = pd.Series(data, index=index, name=name, **kwargs)
99it [35:00, 28.17s/it]

Saved 11889 rows.

Read file 99.


  s = pd.Series(data, index=index, name=name, **kwargs)
100it [35:20, 25.54s/it]

Saved 12011 rows.

Read file 100.


  s = pd.Series(data, index=index, name=name, **kwargs)
101it [35:37, 22.94s/it]

Saved 11894 rows.

Read file 101.


  s = pd.Series(data, index=index, name=name, **kwargs)
102it [35:53, 20.80s/it]

Saved 11907 rows.

Read file 102.


  s = pd.Series(data, index=index, name=name, **kwargs)
103it [36:33, 26.77s/it]

Saved 12270 rows.

Read file 103.


  s = pd.Series(data, index=index, name=name, **kwargs)
104it [36:49, 23.49s/it]

Saved 12085 rows.

Read file 104.
CBG length incorrect: CA:59150078


  s = pd.Series(data, index=index, name=name, **kwargs)
105it [37:16, 24.42s/it]

Saved 19315 rows.

Read file 105.


  s = pd.Series(data, index=index, name=name, **kwargs)
106it [37:32, 21.99s/it]

Saved 12275 rows.

Read file 106.


  s = pd.Series(data, index=index, name=name, **kwargs)
107it [37:48, 20.28s/it]

Saved 11868 rows.

Read file 107.


  s = pd.Series(data, index=index, name=name, **kwargs)
108it [38:09, 20.32s/it]

Saved 11791 rows.

Read file 108.


  s = pd.Series(data, index=index, name=name, **kwargs)
109it [38:25, 19.12s/it]

Saved 12063 rows.

Read file 109.


  s = pd.Series(data, index=index, name=name, **kwargs)
110it [38:53, 21.86s/it]

Saved 11738 rows.

Read file 110.


  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
111it [39:10, 20.42s/it]

Saved 12058 rows.

Read file 111.


  s = pd.Series(data, index=index, name=name, **kwargs)
112it [39:26, 19.09s/it]

Saved 12016 rows.

Read file 112.


  s = pd.Series(data, index=index, name=name, **kwargs)
113it [39:42, 17.93s/it]

Saved 12147 rows.

Read file 113.


  s = pd.Series(data, index=index, name=name, **kwargs)
114it [39:59, 17.87s/it]

Saved 12016 rows.

Read file 114.


  s = pd.Series(data, index=index, name=name, **kwargs)
115it [40:16, 17.56s/it]

Saved 11941 rows.

Read file 115.


  s = pd.Series(data, index=index, name=name, **kwargs)
116it [40:32, 17.17s/it]

Saved 11957 rows.

Read file 116.


  s = pd.Series(data, index=index, name=name, **kwargs)
117it [40:48, 16.85s/it]

Saved 11948 rows.

Read file 117.


  s = pd.Series(data, index=index, name=name, **kwargs)
118it [41:05, 16.77s/it]

Saved 11926 rows.

Read file 118.


  s = pd.Series(data, index=index, name=name, **kwargs)
119it [41:20, 16.21s/it]

Saved 12011 rows.

Read file 119.


  s = pd.Series(data, index=index, name=name, **kwargs)
120it [41:39, 20.83s/it]
0it [00:00, ?it/s]

Saved 11994 rows.
--------done: month 11--------

Read file 0.


  s = pd.Series(data, index=index, name=name, **kwargs)
1it [00:15, 15.99s/it]

Saved 11582 rows.

Read file 1.


  s = pd.Series(data, index=index, name=name, **kwargs)
2it [00:32, 16.19s/it]

Saved 11383 rows.

Read file 2.


  s = pd.Series(data, index=index, name=name, **kwargs)
3it [00:48, 16.09s/it]

Saved 11507 rows.

Read file 3.


  s = pd.Series(data, index=index, name=name, **kwargs)
4it [01:04, 15.96s/it]

Saved 11506 rows.

Read file 4.


  s = pd.Series(data, index=index, name=name, **kwargs)
5it [01:20, 15.96s/it]

Saved 11491 rows.

Read file 5.


  s = pd.Series(data, index=index, name=name, **kwargs)
6it [01:59, 23.88s/it]

Saved 11411 rows.

Read file 6.


  s = pd.Series(data, index=index, name=name, **kwargs)
7it [02:16, 21.65s/it]

Saved 11805 rows.

Read file 7.


  s = pd.Series(data, index=index, name=name, **kwargs)
8it [02:32, 19.78s/it]

Saved 11583 rows.

Read file 8.
CBG length incorrect: CA:59150078


  s = pd.Series(data, index=index, name=name, **kwargs)
9it [03:01, 22.67s/it]

Saved 21798 rows.

Read file 9.


  s = pd.Series(data, index=index, name=name, **kwargs)
10it [03:17, 20.81s/it]

Saved 11535 rows.

Read file 10.


  s = pd.Series(data, index=index, name=name, **kwargs)
11it [03:35, 19.98s/it]

Saved 11456 rows.

Read file 11.


  s = pd.Series(data, index=index, name=name, **kwargs)
12it [03:53, 19.15s/it]

Saved 11582 rows.

Read file 12.


  s = pd.Series(data, index=index, name=name, **kwargs)
13it [04:08, 18.05s/it]

Saved 11321 rows.

Read file 13.


  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
14it [04:25, 17.57s/it]

Saved 11480 rows.

Read file 14.


  s = pd.Series(data, index=index, name=name, **kwargs)
15it [04:41, 17.18s/it]

Saved 11539 rows.

Read file 15.


  s = pd.Series(data, index=index, name=name, **kwargs)
16it [04:57, 16.96s/it]

Saved 11618 rows.

Read file 16.


  s = pd.Series(data, index=index, name=name, **kwargs)
17it [05:13, 16.66s/it]

Saved 11677 rows.

Read file 17.


  s = pd.Series(data, index=index, name=name, **kwargs)
18it [05:29, 16.42s/it]

Saved 11560 rows.

Read file 18.


  s = pd.Series(data, index=index, name=name, **kwargs)
19it [05:45, 16.30s/it]

Saved 11553 rows.

Read file 19.


  s = pd.Series(data, index=index, name=name, **kwargs)
20it [06:05, 17.47s/it]

Saved 11456 rows.

Read file 20.


  s = pd.Series(data, index=index, name=name, **kwargs)
21it [06:22, 17.10s/it]

Saved 11383 rows.

Read file 21.


  s = pd.Series(data, index=index, name=name, **kwargs)
22it [06:50, 20.55s/it]

Saved 11620 rows.

Read file 22.


  s = pd.Series(data, index=index, name=name, **kwargs)
23it [07:08, 19.76s/it]

Saved 11395 rows.

Read file 23.


  s = pd.Series(data, index=index, name=name, **kwargs)
24it [07:35, 21.95s/it]

Saved 11624 rows.

Read file 24.


  s = pd.Series(data, index=index, name=name, **kwargs)
25it [07:51, 20.05s/it]

Saved 11447 rows.

Read file 25.


  s = pd.Series(data, index=index, name=name, **kwargs)
26it [08:07, 18.98s/it]

Saved 11521 rows.

Read file 26.


  s = pd.Series(data, index=index, name=name, **kwargs)
27it [08:26, 18.84s/it]

Saved 11630 rows.

Read file 27.


  s = pd.Series(data, index=index, name=name, **kwargs)
28it [08:42, 18.03s/it]

Saved 11736 rows.

Read file 28.


  s = pd.Series(data, index=index, name=name, **kwargs)
29it [09:18, 23.51s/it]

Saved 11498 rows.

Read file 29.


  s = pd.Series(data, index=index, name=name, **kwargs)
30it [09:34, 21.33s/it]

Saved 11884 rows.

Read file 30.


  s = pd.Series(data, index=index, name=name, **kwargs)
31it [09:52, 20.09s/it]

Saved 11730 rows.

Read file 31.


  s = pd.Series(data, index=index, name=name, **kwargs)
32it [10:11, 19.83s/it]

Saved 13481 rows.

Read file 32.


  s = pd.Series(data, index=index, name=name, **kwargs)
33it [10:39, 22.30s/it]

CBG length incorrect: CA:59150078
Saved 11565 rows.

Read file 33.


  s = pd.Series(data, index=index, name=name, **kwargs)
34it [10:55, 20.52s/it]

Saved 11802 rows.

Read file 34.


  s = pd.Series(data, index=index, name=name, **kwargs)
35it [11:12, 19.50s/it]

Saved 11498 rows.

Read file 35.


  s = pd.Series(data, index=index, name=name, **kwargs)
36it [11:31, 19.28s/it]

Saved 11861 rows.

Read file 36.


  s = pd.Series(data, index=index, name=name, **kwargs)
37it [11:48, 18.51s/it]

Saved 11411 rows.

Read file 37.


  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
38it [12:16, 21.41s/it]

Saved 11684 rows.

Read file 38.


  s = pd.Series(data, index=index, name=name, **kwargs)
39it [12:44, 23.44s/it]

Saved 11769 rows.

Read file 39.


  s = pd.Series(data, index=index, name=name, **kwargs)
40it [13:01, 21.56s/it]

Saved 11941 rows.

Read file 40.


  s = pd.Series(data, index=index, name=name, **kwargs)
41it [13:47, 28.74s/it]

Saved 11822 rows.

Read file 41.


  s = pd.Series(data, index=index, name=name, **kwargs)
42it [14:03, 25.01s/it]

Saved 11699 rows.

Read file 42.


  s = pd.Series(data, index=index, name=name, **kwargs)
43it [14:19, 22.29s/it]

Saved 11547 rows.

Read file 43.


  s = pd.Series(data, index=index, name=name, **kwargs)
44it [14:35, 20.34s/it]

Saved 11569 rows.

Read file 44.


  s = pd.Series(data, index=index, name=name, **kwargs)
45it [15:02, 22.34s/it]

Saved 11489 rows.

Read file 45.


  s = pd.Series(data, index=index, name=name, **kwargs)
46it [15:19, 20.68s/it]

Saved 11697 rows.

Read file 46.


  s = pd.Series(data, index=index, name=name, **kwargs)
47it [16:22, 33.55s/it]

Saved 11590 rows.

Read file 47.


  s = pd.Series(data, index=index, name=name, **kwargs)
48it [16:38, 28.28s/it]

Saved 11659 rows.

Read file 48.


  s = pd.Series(data, index=index, name=name, **kwargs)
49it [17:07, 28.34s/it]

Saved 11550 rows.

Read file 49.


  s = pd.Series(data, index=index, name=name, **kwargs)
50it [17:24, 24.89s/it]

Saved 11530 rows.

Read file 50.


  s = pd.Series(data, index=index, name=name, **kwargs)
51it [17:41, 22.71s/it]

Saved 11629 rows.

Read file 51.


  s = pd.Series(data, index=index, name=name, **kwargs)
52it [17:58, 21.05s/it]

Saved 11563 rows.

Read file 52.


  s = pd.Series(data, index=index, name=name, **kwargs)
53it [18:34, 25.39s/it]

Saved 11594 rows.

Read file 53.


  s = pd.Series(data, index=index, name=name, **kwargs)
54it [18:50, 22.45s/it]

Saved 11824 rows.

Read file 54.


  s = pd.Series(data, index=index, name=name, **kwargs)
55it [19:07, 20.83s/it]

Saved 11711 rows.

Read file 55.


  s = pd.Series(data, index=index, name=name, **kwargs)
56it [19:33, 22.57s/it]

CBG length incorrect: CA:59150078
Saved 18690 rows.

Read file 56.


  s = pd.Series(data, index=index, name=name, **kwargs)
57it [19:49, 20.60s/it]

Saved 11862 rows.

Read file 57.


  s = pd.Series(data, index=index, name=name, **kwargs)
58it [20:05, 19.27s/it]

Saved 11632 rows.

Read file 58.


  s = pd.Series(data, index=index, name=name, **kwargs)
59it [20:33, 21.81s/it]

Saved 11486 rows.

Read file 59.


  s = pd.Series(data, index=index, name=name, **kwargs)
60it [20:50, 20.35s/it]

Saved 11633 rows.

Read file 60.


  s = pd.Series(data, index=index, name=name, **kwargs)
61it [21:06, 19.15s/it]

Saved 11366 rows.

Read file 61.


  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
62it [21:26, 19.12s/it]

Saved 11629 rows.

Read file 62.


  s = pd.Series(data, index=index, name=name, **kwargs)
63it [21:41, 18.11s/it]

Saved 11609 rows.

Read file 63.


  s = pd.Series(data, index=index, name=name, **kwargs)
64it [22:09, 21.13s/it]

Saved 11786 rows.

Read file 64.


  s = pd.Series(data, index=index, name=name, **kwargs)
65it [22:26, 19.86s/it]

Saved 11673 rows.

Read file 65.


  s = pd.Series(data, index=index, name=name, **kwargs)
66it [22:42, 18.62s/it]

Saved 11647 rows.

Read file 66.


  s = pd.Series(data, index=index, name=name, **kwargs)
67it [22:58, 17.69s/it]

Saved 11581 rows.

Read file 67.


  s = pd.Series(data, index=index, name=name, **kwargs)
68it [23:15, 17.53s/it]

Saved 11506 rows.

Read file 68.


  s = pd.Series(data, index=index, name=name, **kwargs)
69it [23:32, 17.31s/it]

Saved 11570 rows.

Read file 69.


  s = pd.Series(data, index=index, name=name, **kwargs)
70it [23:48, 17.00s/it]

Saved 11528 rows.

Read file 70.


  s = pd.Series(data, index=index, name=name, **kwargs)
71it [24:04, 16.86s/it]

Saved 11592 rows.

Read file 71.


  s = pd.Series(data, index=index, name=name, **kwargs)
72it [24:22, 17.21s/it]

Saved 11736 rows.

Read file 72.


  s = pd.Series(data, index=index, name=name, **kwargs)
73it [24:50, 20.48s/it]

Saved 11612 rows.

Read file 73.


  s = pd.Series(data, index=index, name=name, **kwargs)
74it [25:15, 21.56s/it]

Saved 11673 rows.

Read file 74.


  s = pd.Series(data, index=index, name=name, **kwargs)
75it [25:33, 20.78s/it]

Saved 11719 rows.

Read file 75.


  s = pd.Series(data, index=index, name=name, **kwargs)
76it [25:51, 19.69s/it]

Saved 11723 rows.

Read file 76.


  s = pd.Series(data, index=index, name=name, **kwargs)
77it [26:11, 19.80s/it]

Saved 11632 rows.

Read file 77.


  s = pd.Series(data, index=index, name=name, **kwargs)
78it [26:27, 18.90s/it]

Saved 11985 rows.

Read file 78.


  s = pd.Series(data, index=index, name=name, **kwargs)
79it [26:45, 18.46s/it]

Saved 11856 rows.

Read file 79.


  s = pd.Series(data, index=index, name=name, **kwargs)
80it [27:06, 19.35s/it]

CBG length incorrect: CA:59150078
Saved 14908 rows.

Read file 80.


  s = pd.Series(data, index=index, name=name, **kwargs)
81it [27:24, 18.71s/it]

Saved 11775 rows.

Read file 81.


  s = pd.Series(data, index=index, name=name, **kwargs)
82it [27:43, 18.83s/it]

Saved 11704 rows.

Read file 82.


  s = pd.Series(data, index=index, name=name, **kwargs)
83it [27:59, 18.19s/it]

Saved 11580 rows.

Read file 83.


  s = pd.Series(data, index=index, name=name, **kwargs)
84it [28:17, 17.99s/it]

Saved 11823 rows.

Read file 84.


  s = pd.Series(data, index=index, name=name, **kwargs)
85it [28:49, 22.12s/it]

Saved 11440 rows.

Read file 85.


  return GeometryArray(vectorized.points_from_xy(x, y, z), crs=crs)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cp['GEOID'] = points.apply(lambda p: get_tract_num(tract_data, p))
86it [29:21, 25.19s/it]

Saved 11649 rows.

Read file 86.


  s = pd.Series(data, index=index, name=name, **kwargs)
87it [29:39, 22.96s/it]

Saved 11744 rows.

Read file 87.


  s = pd.Series(data, index=index, name=name, **kwargs)
88it [29:56, 21.16s/it]

Saved 11901 rows.

Read file 88.


  s = pd.Series(data, index=index, name=name, **kwargs)
89it [30:16, 20.95s/it]

Saved 11792 rows.

Read file 89.


  s = pd.Series(data, index=index, name=name, **kwargs)
90it [30:34, 20.04s/it]

Saved 11849 rows.

Read file 90.


  s = pd.Series(data, index=index, name=name, **kwargs)
91it [30:55, 20.26s/it]

Saved 11677 rows.

Read file 91.


  s = pd.Series(data, index=index, name=name, **kwargs)
92it [31:13, 19.63s/it]

Saved 11665 rows.

Read file 92.


  s = pd.Series(data, index=index, name=name, **kwargs)
93it [31:30, 18.86s/it]

Saved 11619 rows.

Read file 93.


  s = pd.Series(data, index=index, name=name, **kwargs)
94it [31:48, 18.46s/it]

Saved 11728 rows.

Read file 94.


  s = pd.Series(data, index=index, name=name, **kwargs)
95it [32:10, 20.32s/it]

Saved 11704 rows.
--------done: month 12--------





In [33]:
from mobility_processor import *

ct = CensusTractMobility(tract_data_dir='../Tracts/nyc_metro_boundaries/nyc_metro_boundaries.shp')

  STATEFP COUNTYFP TRACTCE        GEOID  NAME           NAMELSAD  MTFCC  \
0      09      003  514500  09003514500  5145  Census Tract 5145  G5020   
1      09      003  514600  09003514600  5146  Census Tract 5146  G5020   
2      09      003  514700  09003514700  5147  Census Tract 5147  G5020   
3      09      003  514800  09003514800  5148  Census Tract 5148  G5020   
4      09      003  514900  09003514900  5149  Census Tract 5149  G5020   

  FUNCSTAT    ALAND  AWATER     INTPTLAT      INTPTLON state_abbr  \
0        S  2673486   48144  +41.7772247  -072.5398572         CT   
1        S  2144936       0  +41.7692952  -072.5444530         CT   
2        S  1383464       0  +41.7698536  -072.5227784         CT   
3        S  1377342       0  +41.7708559  -072.5106023         CT   
4        S  3546060   22405  +41.7907521  -072.4835364         CT   

                                            geometry  
0  POLYGON ((-72.56218 41.78077, -72.56122 41.780...  
1  POLYGON ((-72.55995 4

In [35]:
len(ct.node_idx_map.keys())

8231

In [38]:
pd.Series(ct.node_idx_map.keys()).nunique()

8231

In [39]:
graph_obj_path = '../safegraph/graph_checkpoints/nyc_metro/checkpoint_1.pkl'
with open(graph_obj_path, 'rb') as f:
    g = pickle.load(f) # CensusTractMobility object
idx_node_map = g.get_idx_node()

  g = pickle.load(f) # CensusTractMobility object


In [41]:
len(idx_node_map.keys())

8231

In [42]:
g.tract_data['GEOID']

0       09003514500
1       09003514600
2       09003514700
3       09003514800
4       09003514900
           ...     
8226    34029734003
8227    34013014000
8228    34013014400
8229    34013013600
8230    34013013700
Name: GEOID, Length: 8231, dtype: object