Neighborhood Patterns documentation:

https://docs.safegraph.com/docs/neighborhood-patterns


Social Distancing Metric:

https://docs.safegraph.com/docs/social-distancing-metrics



Using SafeGraph Weekly Patterns Dataset to serve as a proxy for discontinued social distancing metrics
https://colab.research.google.com/drive/1ETZo3KBhcwUikLRos5YmMZkf4aPweG44#scrollTo=11r70CTeAdXi

The contribution of wildfire to PM2.5 trends in the USA
https://www.nature.com/articles/s41586-023-06522-6

Wildfire smoke impacts respiratory health more than fine particles from other sources: observational evidence from Southern California
https://www.nature.com/articles/s41467-021-21708-0

In [10]:
# ! pip install CensusData

In [1]:
import pandas as pd
import os
from tqdm import tqdm
tqdm.pandas()
import geopandas as gpd
import glob
pd.set_option('display.max_columns', None)
import ast
import numpy as np
import json

# Helper functions

In [2]:
def get_all_files(root_dir, contains=[''], extions=['']):
    found_files = []
    for rt_dir, dirs, files in os.walk(root_dir):
        for ext in extions:
            ext = ext.lower()
            ext_len = len(ext)
            for file in files:
                file_ext = file[-(ext_len):]
                # print(file)
                file_ext = file_ext.lower()
                if file_ext == ext:
                    file_name = os.path.join(rt_dir, file)
                    found_files.append(file_name)
                    # continue                    
                
        for con in contains:
            con = con.lower()
            con_len = len(con)
            for file in files:
                if con in os.path.basename(file):
                    file_name = os.path.join(rt_dir, file)
                    found_files.append(file_name)
    return found_files

# Get files

In [23]:
data_root_dir = r'F:\SafeGraph\Advan_2023_API'

all_files = get_all_files(data_root_dir)
print(f"Found files: {len(all_files)}")
print("The top 5 and bottom 5 files:")
all_files[:5] + ['...'] + all_files[-5:]

Found files: 34
The top 5 and bottom 5 files:


['F:\\SafeGraph\\Advan_2023_API\\Neighborhood_Patterns\\2023\\06\\01\\data_01af3c05-0404-bf99-0043-0b8700eda046_53_1_0.csv.gz',
 'F:\\SafeGraph\\Advan_2023_API\\Neighborhood_Patterns\\2023\\06\\01\\data_01af3c05-0404-bf99-0043-0b8700eda046_53_1_1.csv.gz',
 'F:\\SafeGraph\\Advan_2023_API\\Neighborhood_Patterns\\2023\\06\\01\\data_01af3c05-0404-bf99-0043-0b8700eda046_53_1_10.csv.gz',
 'F:\\SafeGraph\\Advan_2023_API\\Neighborhood_Patterns\\2023\\06\\01\\data_01af3c05-0404-bf99-0043-0b8700eda046_53_1_2.csv.gz',
 'F:\\SafeGraph\\Advan_2023_API\\Neighborhood_Patterns\\2023\\06\\01\\data_01af3c05-0404-bf99-0043-0b8700eda046_53_1_3.csv.gz',
 '...',
 'F:\\SafeGraph\\Advan_2023_API\\Neighborhood_Patterns\\2023\\08\\01\\data_01af3c05-0404-bf99-0043-0b8700eda046_43_5_5.csv.gz',
 'F:\\SafeGraph\\Advan_2023_API\\Neighborhood_Patterns\\2023\\08\\01\\data_01af3c05-0404-bf99-0043-0b8700eda046_43_5_6.csv.gz',
 'F:\\SafeGraph\\Advan_2023_API\\Neighborhood_Patterns\\2023\\08\\01\\data_01af3c05-0404-bf99-0

In [4]:
target_years = ['2023']
target_months = ['06'] # ['08', '06', '07']
target_dataset = ['Neighborhood_Patterns']
# target_names = ['neighborhood_patterns_']
target_files = []

for file in all_files[:]:
    directories = file.replace(data_root_dir, '').split(os.sep)[1:]
    # print(directories)
    if len(directories) < 5:
        continue
    year = directories[1]
    month = directories[2]
    dataset = directories[0]
    basename = directories[-1]
    if year in target_years:
        if month in target_months:            
            if dataset in target_dataset:
                # for target_name in target_names:
                    # print("Year, Month, dataset, target_name:", year, month, dataset, target_name)
                    # if target_name in basename:
                target_files.append(file)

print(f"Found target files: {len(target_files)}")
print("The top 5 and bottom 5 files:")
target_files[:5] + ['...'] + target_files[-5:]

Found target files: 11
The top 5 and bottom 5 files:


['F:\\SafeGraph\\Advan_2023_API\\Neighborhood_Patterns\\2023\\06\\01\\data_01af3c05-0404-bf99-0043-0b8700eda046_53_1_0.csv.gz',
 'F:\\SafeGraph\\Advan_2023_API\\Neighborhood_Patterns\\2023\\06\\01\\data_01af3c05-0404-bf99-0043-0b8700eda046_53_1_1.csv.gz',
 'F:\\SafeGraph\\Advan_2023_API\\Neighborhood_Patterns\\2023\\06\\01\\data_01af3c05-0404-bf99-0043-0b8700eda046_53_1_10.csv.gz',
 'F:\\SafeGraph\\Advan_2023_API\\Neighborhood_Patterns\\2023\\06\\01\\data_01af3c05-0404-bf99-0043-0b8700eda046_53_1_2.csv.gz',
 'F:\\SafeGraph\\Advan_2023_API\\Neighborhood_Patterns\\2023\\06\\01\\data_01af3c05-0404-bf99-0043-0b8700eda046_53_1_3.csv.gz',
 '...',
 'F:\\SafeGraph\\Advan_2023_API\\Neighborhood_Patterns\\2023\\06\\01\\data_01af3c05-0404-bf99-0043-0b8700eda046_53_1_5.csv.gz',
 'F:\\SafeGraph\\Advan_2023_API\\Neighborhood_Patterns\\2023\\06\\01\\data_01af3c05-0404-bf99-0043-0b8700eda046_53_1_6.csv.gz',
 'F:\\SafeGraph\\Advan_2023_API\\Neighborhood_Patterns\\2023\\06\\01\\data_01af3c05-0404-bf99-0

In [5]:
# df = pd.read_csv(target_files[0])
df = pd.concat([pd.read_csv(f) for f in target_files[:]])
df

Unnamed: 0,AREA,AREA_TYPE,ORIGIN_AREA_TYPE,DATE_RANGE_START,DATE_RANGE_END,DAY_COUNTS,RAW_STOP_COUNTS,RAW_DEVICE_COUNTS,STOPS_BY_DAY,STOPS_BY_EACH_HOUR,DEVICE_HOME_AREAS,WEEKDAY_DEVICE_HOME_AREAS,WEEKEND_DEVICE_HOME_AREAS,BREAKFAST_DEVICE_HOME_AREAS,LUNCH_DEVICE_HOME_AREAS,DINNER_DEVICE_HOME_AREAS,NIGHTLIFE_DEVICE_HOME_AREAS,WORK_HOURS_DEVICE_HOME_AREAS,WORK_BEHAVIOR_DEVICE_HOME_AREAS,DEVICE_DAYTIME_AREAS,DISTANCE_FROM_HOME,DISTANCE_FROM_PRIMARY_DAYTIME_LOCATION,MEDIAN_DWELL,TOP_SAME_DAY_BRAND,TOP_SAME_MONTH_BRAND,POPULARITY_BY_EACH_HOUR,POPULARITY_BY_HOUR_MONDAY,POPULARITY_BY_HOUR_TUESDAY,POPULARITY_BY_HOUR_WEDNESDAY,POPULARITY_BY_HOUR_THURSDAY,POPULARITY_BY_HOUR_FRIDAY,POPULARITY_BY_HOUR_SATURDAY,POPULARITY_BY_HOUR_SUNDAY,DEVICE_TYPE,ISO_COUNTRY_CODE,REGION,Y,M
0,470370112006,Census Block Group,Census Block Group,2023-06-01 00:00:00.000,2023-07-01 00:00:00.000,"{""Monday"":4,""Tuesday"":4,""Wednesday"":4,""Thursda...",6558,4181,"[208,162,223,224,92,215,261,174,260,218,326,15...","[0,0,0,0,0,0,0,0,22,22,39,6,14,26,28,0,7,10,5,...","{""470370112006"":148,""470370153004"":47,""4703701...","{""470370112006"":103,""470370153004"":34,""4703701...","{""470370112006"":15,""470370110022"":4,""470370116...","{""470370112006"":7,""470370110022"":4,""4703701160...","{""470370112006"":15,""470370153004"":4,""470370110...","{""470370112006"":31,""470370153004"":8,""470370110...","{""470370153004"":4,""470370110022"":4,""4703701950...","{""470370112006"":20,""470370153004"":10,""47037011...","{""470370193001"":4,""470370119002"":4,""4703701562...","{""470370112006"":95,""470370195004"":53,""47037019...",3940.0,6356.0,32.066667,"{""Mall"":15,""Dollar Tree"":7,""Kroger"":7,""Walmart...","{""Mall"":64,""Walmart"":40,""Kroger"":39,""Gas Stati...","[9,17,18,21,20,7,7,10,33,44,46,44,50,69,45,9,1...","[137,143,151,147,146,151,142,180,135,189,172,2...","[126,146,150,122,122,151,157,176,245,191,164,2...","[127,141,147,146,152,151,124,175,165,191,211,2...","[126,141,133,139,162,153,142,149,182,242,321,3...","[159,131,131,139,133,148,129,166,161,119,132,1...","[70,83,89,89,77,91,98,116,118,155,93,74,127,17...","[131,127,148,157,156,173,193,214,190,240,300,2...","{""android"":1630,""ios"":1323}",US,TN,2023,6
1,310199694003,Census Block Group,Census Block Group,2023-06-01 00:00:00.000,2023-07-01 00:00:00.000,"{""Monday"":4,""Tuesday"":4,""Wednesday"":4,""Thursda...",42090,8019,"[1842,1829,1559,1210,1200,1150,1338,1712,1755,...","[0,10,17,23,51,16,89,56,90,133,116,174,183,106...","{""310199694003"":560,""310199692031"":242,""310199...","{""310199694003"":469,""310199692031"":195,""310199...","{""310199694003"":128,""310199692031"":31,""3101996...","{""310199694003"":104,""310199692031"":27,""3101996...","{""310199694003"":141,""310199692031"":93,""3101996...","{""310199694003"":143,""310199692031"":51,""3101996...","{""310199694003"":6,""310199692031"":4,""3101996920...","{""310199694003"":247,""310199692031"":145,""310199...","{""310199694003"":5,""310199690001"":8,""3101996920...","{""310199694003"":665,""310199692031"":248,""310199...",1988.0,1287.0,10.483333,"{""Hospitals"":22,""Walmart"":21,""Mall"":20,""Gas St...","{""Walmart"":54,""Mall"":53,""Gas Stations"":41,""Hos...","[110,114,124,133,164,167,225,236,230,239,263,3...","[324,345,348,362,356,478,574,739,683,817,824,8...","[334,378,362,398,425,483,582,691,833,869,881,9...","[449,470,423,465,494,593,664,783,797,775,715,8...","[423,477,462,474,503,589,837,964,1068,1007,112...","[485,512,505,516,619,712,896,1085,1245,1110,12...","[391,430,384,427,416,512,487,515,565,624,613,6...","[443,433,415,465,485,512,511,516,594,635,685,6...","{""android"":2740,""ios"":2349}",US,NE,2023,6
2,550939606003,Census Block Group,Census Block Group,2023-06-01 00:00:00.000,2023-07-01 00:00:00.000,"{""Monday"":4,""Tuesday"":4,""Wednesday"":4,""Thursda...",33042,4781,"[1594,1195,789,1047,930,927,726,928,1295,1130,...","[0,0,5,42,74,42,23,59,82,145,70,72,81,71,64,11...","{""550939606003"":305,""550939606001"":85,""2704908...","{""550939606003"":197,""550939606001"":54,""2704908...","{""550939606003"":130,""550939606001"":15,""2704908...","{""550939606003"":46,""550939606001"":5,""270490801...","{""550939606003"":70,""550939606001"":22,""27049080...","{""550939606003"":97,""550939606001"":20,""27049080...","{""550939606003"":7,""270490801025"":4,""2704908020...","{""550939606003"":92,""550939606001"":30,""27049080...","{""550939606003"":4,""550939606001"":6,""2704908020...","{""550939606003"":212,""550939607001"":66,""5509396...",5057.0,8137.0,11.366667,"{""Gas Stations"":9,""Kwik Trip"":9,""Walmart"":9,""M...","{""Walmart"":39,""Mall"":37,""Gas Stations"":33,""Kwi...","[83,88,94,135,163,155,129,119,130,181,136,179,...","[452,502,504,557,525,455,475,474,458,482,496,5...","[505,534,510,524,547,473,529,606,553,590,498,5...","[392,449,441,466,485,424,445,419,498,508,484,5...","[530,575,582,639,705,683,702,619,643,743,669,7...","[700,727,755,715,727,761,788,815,846,908,777,7...","[442,446,510,578,630,663,750,756,728,670,638,7...","[550,597,612,640,647,700,794,885,870,800,730,6...","{""android"":2400,""ios"":1295}",US,WI,2023,6
3,450770112042,Census Block Group,Census Block Group,2023-06-01 00:00:00.000,2023-07-01 00:00:00.000,"{""Monday"":4,""Tuesday"":4,""Wednesday"":4,""Thursda...",11249,3083,"[280,354,256,461,475,677,451,321,401,479,187,2...","[42,0,0,0,0,0,10,28,16,0,17,14,8,36,11,14,0,51...","{""450770112042"":113,""450730306021"":65,""4507701...","{""450770112042"":93,""450730306021"":53,""45077011...","{""450770112042"":33,""450730306021"":4,""450770112...","{""450770112042"":15,""450730306021"":4,""450770112...","{""450770112042"":22,""450730306021"":9,""450770112...","{""450770112042"":32,""450730306021"":13,""45077011...","{""450770112042"":4,""450730306021"":6,""4507701120...","{""450770112042"":34,""450730306021"":21,""45077011...","{""450770112042"":5,""450770112041"":4,""4507701120...","{""450770112042"":111,""450770111011"":77,""4507701...",6620.0,5274.0,22.566667,"{""Mall"":21,""Walmart"":14,""Universities and Coll...","{""Mall"":67,""Walmart"":56,""Gas Stations"":32,""Ing...","[36,25,27,28,22,28,39,72,22,12,14,16,10,55,37,...","[248,255,255,242,270,294,316,320,239,260,270,2...","[213,269,303,316,330,333,362,431,335,342,341,2...","[226,249,255,269,273,310,361,448,450,429,408,3...","[259,231,243,213,242,286,384,424,462,351,335,3...","[203,230,228,248,243,276,300,455,430,354,333,3...","[184,259,247,247,259,253,294,320,354,362,394,4...","[181,212,240,241,249,245,240,254,229,233,272,2...","{""android"":886,""ios"":997}",US,SC,2023,6
4,60375545222,Census Block Group,Census Block Group,2023-06-01 00:00:00.000,2023-07-01 00:00:00.000,"{""Monday"":4,""Tuesday"":4,""Wednesday"":4,""Thursda...",14560,6277,"[313,430,408,450,351,886,451,358,432,808,613,2...","[0,0,0,0,0,14,0,0,0,24,0,36,22,22,0,17,37,0,96...","{""060375545222"":28,""060375545122"":12,""32003006...","{""060375545222"":16,""060375545122"":6,""320030067...","{""060375545222"":4,""060375545212"":9,""0603755452...","{""060375545222"":4,""060375545212"":6,""0603755480...","{""060375545222"":5,""060375700012"":4,""0605911011...","{""060375545222"":4,""060375545122"":12,""320030067...","{""060375545212"":4,""060375545223"":4,""0603755360...","{""060375545222"":5,""060375545122"":4,""0603757000...","{""060375545212"":4,""060375541052"":4,""0603755490...","{""060375545222"":29,""060375545223"":14,""06037554...",9763.0,8771.0,25.483333,"{""Mall"":25,""Macerich"":18,""Starbucks"":10,""Amazo...","{""Mall"":86,""Macerich"":45,""Starbucks"":43,""Walma...","[0,0,0,0,0,20,0,0,0,23,23,59,81,121,97,77,76,5...","[102,101,119,100,93,103,102,116,77,213,396,353...","[123,97,121,117,135,160,121,96,79,258,339,358,...","[74,77,82,73,77,76,122,60,135,139,133,259,409,...","[76,73,78,77,81,102,76,102,132,199,254,213,272...","[60,60,62,57,54,95,117,113,154,220,254,549,533...","[94,116,99,80,102,118,137,123,78,192,295,352,3...","[62,54,60,83,81,94,93,135,133,212,376,388,333,...","{""android"":3534,""ios"":2013}",US,CA,2023,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20952,60375433211,Census Block Group,Census Block Group,2023-06-01 00:00:00.000,2023-07-01 00:00:00.000,"{""Monday"":4,""Tuesday"":4,""Wednesday"":4,""Thursda...",76304,13849,"[2619,2600,2303,1380,3382,2773,2634,2639,2966,...","[0,39,38,24,0,43,60,329,241,138,211,172,158,21...","{""060375433211"":81,""060375433051"":21,""06037543...","{""060375433211"":59,""060375433051"":15,""06037543...","{""060375433211"":10,""060375433212"":4,""060375433...","{""060375433211"":12,""060375433051"":4,""060375433...","{""060375433211"":17,""060375433051"":4,""060375433...","{""060375433211"":16,""060375433212"":4,""060375430...","{""060375433211"":4,""060375433051"":4,""0603754332...","{""060375433211"":31,""060375433051"":10,""06037543...","{""060375433211"":4,""060379800281"":4,""0603760300...","{""060375433211"":142,""060375433051"":34,""0603754...",9998.0,5854.0,55.333333,"{""Universities and Colleges"":24,""Mall"":22,""AEG...","{""Mall"":76,""Walmart"":40,""Universities and Coll...","[162,197,235,240,196,214,221,330,447,467,495,3...","[533,570,604,593,611,805,887,966,1222,1295,149...","[748,703,532,490,490,567,827,1057,1414,1551,16...","[629,664,607,594,568,787,946,1237,1608,1589,16...","[925,988,919,868,725,903,1004,1261,1629,1652,1...","[803,863,789,726,825,1046,1195,1414,1707,1805,...","[570,708,649,667,731,769,822,850,863,900,843,8...","[605,612,571,572,547,571,570,604,730,851,848,7...","{""android"":9581,""ios"":3204}",US,CA,2023,6
20953,390375701021,Census Block Group,Census Block Group,2023-06-01 00:00:00.000,2023-07-01 00:00:00.000,"{""Monday"":4,""Tuesday"":4,""Wednesday"":4,""Thursda...",12605,1413,"[241,129,294,223,215,303,204,457,451,513,344,3...","[0,0,0,0,0,0,0,0,15,26,9,0,7,13,39,6,14,0,23,1...","{""390375701021"":183,""390375701023"":39,""3903754...","{""390375701021"":121,""390375701023"":23,""3903754...","{""390375701021"":73,""390375701023"":4,""390375401...","{""390375701021"":32,""390375701023"":4,""390375401...","{""390375701021"":32,""390375701023"":5,""390375401...","{""390375701021"":65,""390375701023"":6,""390375401...","{""390375701021"":9,""390375701023"":18,""390375701...","{""390375701021"":64,""390375701023"":10,""39037540...","{""390375701021"":13,""390375701023"":7,""390375701...","{""390375701021"":133,""390375701023"":30,""3903757...",1672.0,1672.0,20.883333,"{""Golf Courses"":32,""Mall"":16,""Walmart"":11,""Kro...","{""Mall"":41,""Golf Courses"":40,""Walmart"":38,""Kro...","[91,95,86,90,81,77,70,61,56,69,55,43,45,57,65,...","[276,272,250,240,174,206,259,291,280,321,284,2...","[283,270,268,261,242,265,287,296,321,332,303,3...","[330,344,343,340,297,309,268,278,236,247,246,2...","[425,418,421,418,353,382,383,357,396,420,413,3...","[364,372,369,372,329,327,378,315,318,330,347,3...","[277,291,284,280,283,303,345,329,321,315,292,3...","[307,353,322,335,300,316,329,349,368,396,428,4...","{""android"":617,""ios"":378}",US,OH,2023,6
20954,201259509002,Census Block Group,Census Block Group,2023-06-01 00:00:00.000,2023-07-01 00:00:00.000,"{""Monday"":4,""Tuesday"":4,""Wednesday"":4,""Thursda...",26633,4948,"[958,1126,1618,804,807,995,980,1092,1460,705,5...","[5,0,0,0,8,8,39,50,85,55,36,28,59,132,41,82,61...","{""201259509002"":151,""201259508001"":93,""2009995...","{""201259509002"":105,""201259508001"":58,""2009995...","{""201259509002"":61,""201259508001"":26,""20099950...","{""201259509002"":23,""201259508001"":9,""200999506...","{""201259509002"":46,""201259508001"":27,""20099950...","{""201259509002"":45,""201259508001"":16,""20099950...","{""201259509002"":4,""201259508001"":14,""401051721...","{""201259509002"":51,""201259508001"":37,""20099950...","{""200999506001"":4,""201259510002"":4,""2012595070...","{""201259509002"":170,""201259510003"":106,""201259...",7476.0,5267.0,20.033333,"{""Walmart"":52,""Gas Stations"":11,""CVR Partners""...","{""Walmart"":75,""Mall"":46,""Gas Stations"":41,""Hos...","[28,39,47,35,54,54,67,87,107,119,87,86,108,204...","[190,192,196,192,214,204,236,338,267,344,336,3...","[154,161,156,178,197,212,263,331,386,476,484,4...","[191,188,177,216,286,254,250,343,380,460,537,5...","[284,315,327,314,354,332,402,495,505,675,639,6...","[306,325,334,336,339,337,353,410,579,616,714,6...","[254,214,249,265,303,302,342,350,395,430,471,5...","[176,203,200,228,223,252,260,311,345,384,375,3...","{""android"":2675,""ios"":1362}",US,KS,2023,6
20955,550790190003,Census Block Group,Census Block Group,2023-06-01 00:00:00.000,2023-07-01 00:00:00.000,"{""Monday"":4,""Tuesday"":4,""Wednesday"":4,""Thursda...",4628,1532,"[142,92,79,77,133,149,173,175,280,127,78,132,1...","[15,0,0,0,0,0,0,0,0,10,0,14,0,0,8,8,17,0,25,31...","{""550790190003"":26,""550790190005"":13,""55079019...","{""550790190003"":17,""550790190005"":9,""550790190...","{""550790190003"":6,""550790190005"":4,""5507901900...","{""550790190003"":4,""550790190005"":4,""5507901900...","{""550790190003"":4,""550790190005"":4,""5507901900...","{""550790190003"":4,""550790190005"":4,""5507901900...","{""550790190001"":4,""550790191002"":4,""5507901910...","{""550790190003"":5,""550790190005"":6,""5507901900...","{""550790190005"":4,""550790190004"":7,""5507918740...","{""550790190003"":14,""550790190001"":8,""550790190...",2130.0,2131.0,40.950000,"{""Mall"":24,""Hospitals"":10,""Pick 'n Save"":8,""Wa...","{""Mall"":56,""Walmart"":50,""Pick 'n Save"":36,""Gas...","[39,42,38,39,41,41,41,42,38,61,41,37,25,24,34,...","[115,133,134,130,147,142,155,162,181,165,158,1...","[68,117,147,127,134,116,130,160,161,165,182,15...","[107,134,157,140,127,128,135,120,166,196,166,1...","[188,201,156,147,126,133,157,162,147,199,195,1...","[174,199,179,142,143,165,188,200,206,196,187,1...","[83,100,114,103,101,103,93,105,145,126,133,188...","[109,140,171,149,130,136,147,146,142,87,65,92,...","{""android"":820,""ios"":369}",US,WI,2023,6


In [16]:
df.columns

Index(['AREA', 'AREA_TYPE', 'ORIGIN_AREA_TYPE', 'DATE_RANGE_START',
       'DATE_RANGE_END', 'DAY_COUNTS', 'RAW_STOP_COUNTS', 'RAW_DEVICE_COUNTS',
       'STOPS_BY_DAY', 'STOPS_BY_EACH_HOUR', 'DEVICE_HOME_AREAS',
       'WEEKDAY_DEVICE_HOME_AREAS', 'WEEKEND_DEVICE_HOME_AREAS',
       'BREAKFAST_DEVICE_HOME_AREAS', 'LUNCH_DEVICE_HOME_AREAS',
       'DINNER_DEVICE_HOME_AREAS', 'NIGHTLIFE_DEVICE_HOME_AREAS',
       'WORK_HOURS_DEVICE_HOME_AREAS', 'WORK_BEHAVIOR_DEVICE_HOME_AREAS',
       'DEVICE_DAYTIME_AREAS', 'DISTANCE_FROM_HOME',
       'DISTANCE_FROM_PRIMARY_DAYTIME_LOCATION', 'MEDIAN_DWELL',
       'TOP_SAME_DAY_BRAND', 'TOP_SAME_MONTH_BRAND', 'POPULARITY_BY_EACH_HOUR',
       'POPULARITY_BY_HOUR_MONDAY', 'POPULARITY_BY_HOUR_TUESDAY',
       'POPULARITY_BY_HOUR_WEDNESDAY', 'POPULARITY_BY_HOUR_THURSDAY',
       'POPULARITY_BY_HOUR_FRIDAY', 'POPULARITY_BY_HOUR_SATURDAY',
       'POPULARITY_BY_HOUR_SUNDAY', 'DEVICE_TYPE', 'ISO_COUNTRY_CODE',
       'REGION', 'Y', 'M'],
      dtype='obje

In [17]:
df['AREA'] = df['AREA'].astype(str).str.zfill(12)
df['M'] = df['M'].astype(str).str.zfill(2)

In [18]:
CBG_count = len(df['AREA'].unique())
CBG_count

220684

In [19]:
len(str(sorted(df['AREA'].to_list())[0]))

12

## Get the CBG list

In [20]:
# get the CBG list
CBG_file_name = r"D:\OneDrive_PSU\OneDrive - The Pennsylvania State University\Research_doc\Wild_fire\CBG_list.csv"
# df[['area']].sort_values('area').to_csv(CBG_file_name, index=False)


In [21]:
np.ones((220684, 720)).shape

(220684, 720)

## Create a CBG diction

In [22]:
CBG_dict_file = r"D:\OneDrive_PSU\OneDrive - The Pennsylvania State University\Research_doc\Wild_fire\CBG_dict.json"

In [24]:
CBG_df = pd.read_csv(CBG_file_name, dtype={'area':str})
CBG_dict = {row['area']:idx for idx, row in CBG_df.iterrows()}
# json.dump(CBG_dict, open(CBG_dict_file, 'w'))

In [25]:
# CBG_dict
CBG_df

Unnamed: 0,area
0,010010201001
1,010010201002
2,010010202001
3,010010202002
4,010010203001
...,...
220679,780309611001
220680,780309611002
220681,780309612001
220682,780309612002


In [27]:
def CBG_list_to_Sqlite(df, sqlite_name):
     
    conn = sqlite3.connect(sqlite_name)
    curs = conn.cursor()
    
    columns = ','.join(df.columns)
    columns.replace('placekey,parent_placekey', 'placekey PRIMARY KEY,parent_placekey')
    
    curs.execute('create table if not exists POI ' +
                f"({','.join(df.columns)})")
    df.to_sql('POI', conn, if_exists='replace', index=False)
    
    sql = f'CREATE INDEX placekey_idx ON POI(placekey);'    
    curs.execute(sql)
    
    conn.close

In [14]:
len(ast.literal_eval(df.iloc[0]['stops_by_each_hour']))#[0][0][0]

720

In [15]:
df.columns

Index(['area', 'area_type', 'origin_area_type', 'date_range_start',
       'date_range_end', 'day_counts', 'raw_stop_counts', 'raw_device_counts',
       'stops_by_day', 'stops_by_each_hour', 'device_home_areas',
       'weekday_device_home_areas', 'weekend_device_home_areas',
       'breakfast_device_home_areas', 'lunch_device_home_areas',
       'afternoon_tea_device_home_areas', 'dinner_device_home_areas',
       'nightlife_device_home_areas', 'work_hours_device_home_areas',
       'work_behavior_device_home_areas', 'device_daytime_areas',
       'distance_from_home', 'distance_from_primary_daytime_location',
       'median_dwell', 'top_same_day_brand', 'top_same_month_brand',
       'popularity_by_each_hour', 'popularity_by_hour_monday',
       'popularity_by_hour_tuesday', 'popularity_by_hour_wednesday',
       'popularity_by_hour_thursday', 'popularity_by_hour_friday',
       'popularity_by_hour_saturday', 'popularity_by_hour_sunday',
       'device_type', 'iso_country_code', 're

In [26]:
df.iloc[0]['stops_by_each_hour']

'[0,0,0,0,0,16,0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,0,0,20,0,13,0,0,0,0,15,76,27,40,93,12,0,0,0,14,7,6,0,0,0,0,27,12,7,6,0,37,0,24,0,0,0,0,0,0,0,28,15,0,0,14,0,0,0,0,0,18,21,15,53,0,0,0,0,9,8,7,39,34,38,0,0,16,16,7,7,0,16,42,25,9,9,0,11,20,26,20,35,34,46,41,20,71,13,19,23,14,0,9,10,0,13,0,0,13,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,26,0,10,23,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,15,16,13,0,0,0,24,0,0,0,9,15,46,0,0,0,0,0,0,23,40,24,24,19,10,29,0,0,0,22,43,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,32,0,0,0,0,0,0,0,0,0,0,14,8,63,48,0,38,52,0,0,0,0,0,0,0,0,0,0,0,16,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,61,15,0,0,17,41,28,37,57,22,0,22,0,21,60,33,0,0,0,9,30,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,39,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,11,0,0,0,0,0,0,6,0,17,0,38,29,22,27,0,16,13,29,0,7,10,0,34,80,79,36,25,26,0,15,0,16,9,0,0,7,0,0,0,0,0,6,14,0,

In [27]:
df.iloc[0]['popularity_by_each_hour']

'[0,0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,16,11,16,0,7,0,0,0,0,13,6,12,7,14,15,0,0,0,16,11,12,6,7,11,0,6,9,22,19,0,15,11,12,0,0,0,0,0,0,0,9,16,0,0,7,0,0,0,0,0,23,18,11,33,11,0,0,0,6,7,15,14,12,11,12,0,6,24,10,16,0,12,22,12,11,10,0,15,11,20,22,9,11,25,26,27,22,20,23,13,16,0,6,13,0,10,0,0,6,0,0,0,0,7,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,21,8,8,12,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,14,7,22,0,0,0,16,13,7,0,14,11,12,12,0,0,0,0,0,7,10,6,13,13,16,16,0,0,0,16,17,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,10,7,6,11,7,16,12,12,0,0,0,0,0,0,0,0,0,0,12,0,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,17,6,0,0,9,6,12,6,20,17,0,10,0,14,13,9,0,0,0,10,11,9,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,21,22,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,10,7,0,0,0,0,0,15,0,16,10,16,26,27,8,0,16,10,25,15,11,13,8,23,39,15,25,17,26,23,17,22,15,7,0,0,16,0,0,0,0,0,

In [28]:
len(ast.literal_eval(df.iloc[0]['popularity_by_each_hour']))

720

In [29]:
ast.literal_eval(df.iloc[0]['device_home_areas'])
ast.literal_eval(df.iloc[0]['stops_by_day'])
np.array(ast.literal_eval(df.iloc[0]['stops_by_day'])).sum()

4356

In [30]:
np.array(ast.literal_eval(df.iloc[0]['stops_by_each_hour'])).sum()

4367

In [31]:
# 

In [32]:
stops = ast.literal_eval(df.iloc[0]['stops_by_each_hour'])
len(stops)

720

# Create edge list of a hourly matrix

## Load CBGs and their index 

In [28]:
import random
import sqlite3

CBG_file_name = r"D:\OneDrive_PSU\OneDrive - The Pennsylvania State University\Research_doc\Wild_fire\CBG_list.csv"
# CBG_dict_file = r"D:\OneDrive_PSU\OneDrive - The Pennsylvania State University\Research_doc\Wild_fire\CBG_dict.json"
saved_dir = r'D:\SafeGraph\Neighborhood_Patterns_reorganized'

In [29]:
CBG_df = pd.read_csv(CBG_file_name, dtype={'area': str})
CBG_df['CBG_index'] = CBG_df.index
# Pandas dataframe to dictionary. Column "area" as the key, column "CBG_index" as the value.
CBG_dict = CBG_df.set_index('area')['CBG_index'].to_dict()
print("Found CBG count:", len(CBG_dict.keys()))
CBG_df

Found CBG count: 220684


Unnamed: 0,area,CBG_index
0,010010201001,0
1,010010201002,1
2,010010202001,2
3,010010202002,3
4,010010203001,4
...,...,...
220679,780309611001,220679
220680,780309611002,220680
220681,780309612001,220681
220682,780309612002,220682


In [31]:
n = 5 # for n random key-value pairs
random_keys = random.sample(list(CBG_dict.keys()), n)
random_items = {key: CBG_dict[key] for key in random_keys}

print(random_items)

{'421298029003': 173920, '330151071002': 119773, '551332033061': 216924, '320030055024': 118260, '360010019012': 127727}


In [33]:
np_df = df
np_df['M'] = np_df['M'].astype(str).str.zfill(2)
year = np_df.iloc[0]['Y']
month = np_df.iloc[0]['M']
print(f"Year, month: {year}-{month}")

np_df

Year, month: 2023-06


Unnamed: 0,AREA,AREA_TYPE,ORIGIN_AREA_TYPE,DATE_RANGE_START,DATE_RANGE_END,DAY_COUNTS,RAW_STOP_COUNTS,RAW_DEVICE_COUNTS,STOPS_BY_DAY,STOPS_BY_EACH_HOUR,DEVICE_HOME_AREAS,WEEKDAY_DEVICE_HOME_AREAS,WEEKEND_DEVICE_HOME_AREAS,BREAKFAST_DEVICE_HOME_AREAS,LUNCH_DEVICE_HOME_AREAS,DINNER_DEVICE_HOME_AREAS,NIGHTLIFE_DEVICE_HOME_AREAS,WORK_HOURS_DEVICE_HOME_AREAS,WORK_BEHAVIOR_DEVICE_HOME_AREAS,DEVICE_DAYTIME_AREAS,DISTANCE_FROM_HOME,DISTANCE_FROM_PRIMARY_DAYTIME_LOCATION,MEDIAN_DWELL,TOP_SAME_DAY_BRAND,TOP_SAME_MONTH_BRAND,POPULARITY_BY_EACH_HOUR,POPULARITY_BY_HOUR_MONDAY,POPULARITY_BY_HOUR_TUESDAY,POPULARITY_BY_HOUR_WEDNESDAY,POPULARITY_BY_HOUR_THURSDAY,POPULARITY_BY_HOUR_FRIDAY,POPULARITY_BY_HOUR_SATURDAY,POPULARITY_BY_HOUR_SUNDAY,DEVICE_TYPE,ISO_COUNTRY_CODE,REGION,Y,M
0,470370112006,Census Block Group,Census Block Group,2023-06-01 00:00:00.000,2023-07-01 00:00:00.000,"{""Monday"":4,""Tuesday"":4,""Wednesday"":4,""Thursda...",6558,4181,"[208,162,223,224,92,215,261,174,260,218,326,15...","[0,0,0,0,0,0,0,0,22,22,39,6,14,26,28,0,7,10,5,...","{""470370112006"":148,""470370153004"":47,""4703701...","{""470370112006"":103,""470370153004"":34,""4703701...","{""470370112006"":15,""470370110022"":4,""470370116...","{""470370112006"":7,""470370110022"":4,""4703701160...","{""470370112006"":15,""470370153004"":4,""470370110...","{""470370112006"":31,""470370153004"":8,""470370110...","{""470370153004"":4,""470370110022"":4,""4703701950...","{""470370112006"":20,""470370153004"":10,""47037011...","{""470370193001"":4,""470370119002"":4,""4703701562...","{""470370112006"":95,""470370195004"":53,""47037019...",3940.0,6356.0,32.066667,"{""Mall"":15,""Dollar Tree"":7,""Kroger"":7,""Walmart...","{""Mall"":64,""Walmart"":40,""Kroger"":39,""Gas Stati...","[9,17,18,21,20,7,7,10,33,44,46,44,50,69,45,9,1...","[137,143,151,147,146,151,142,180,135,189,172,2...","[126,146,150,122,122,151,157,176,245,191,164,2...","[127,141,147,146,152,151,124,175,165,191,211,2...","[126,141,133,139,162,153,142,149,182,242,321,3...","[159,131,131,139,133,148,129,166,161,119,132,1...","[70,83,89,89,77,91,98,116,118,155,93,74,127,17...","[131,127,148,157,156,173,193,214,190,240,300,2...","{""android"":1630,""ios"":1323}",US,TN,2023,06
1,310199694003,Census Block Group,Census Block Group,2023-06-01 00:00:00.000,2023-07-01 00:00:00.000,"{""Monday"":4,""Tuesday"":4,""Wednesday"":4,""Thursda...",42090,8019,"[1842,1829,1559,1210,1200,1150,1338,1712,1755,...","[0,10,17,23,51,16,89,56,90,133,116,174,183,106...","{""310199694003"":560,""310199692031"":242,""310199...","{""310199694003"":469,""310199692031"":195,""310199...","{""310199694003"":128,""310199692031"":31,""3101996...","{""310199694003"":104,""310199692031"":27,""3101996...","{""310199694003"":141,""310199692031"":93,""3101996...","{""310199694003"":143,""310199692031"":51,""3101996...","{""310199694003"":6,""310199692031"":4,""3101996920...","{""310199694003"":247,""310199692031"":145,""310199...","{""310199694003"":5,""310199690001"":8,""3101996920...","{""310199694003"":665,""310199692031"":248,""310199...",1988.0,1287.0,10.483333,"{""Hospitals"":22,""Walmart"":21,""Mall"":20,""Gas St...","{""Walmart"":54,""Mall"":53,""Gas Stations"":41,""Hos...","[110,114,124,133,164,167,225,236,230,239,263,3...","[324,345,348,362,356,478,574,739,683,817,824,8...","[334,378,362,398,425,483,582,691,833,869,881,9...","[449,470,423,465,494,593,664,783,797,775,715,8...","[423,477,462,474,503,589,837,964,1068,1007,112...","[485,512,505,516,619,712,896,1085,1245,1110,12...","[391,430,384,427,416,512,487,515,565,624,613,6...","[443,433,415,465,485,512,511,516,594,635,685,6...","{""android"":2740,""ios"":2349}",US,NE,2023,06
2,550939606003,Census Block Group,Census Block Group,2023-06-01 00:00:00.000,2023-07-01 00:00:00.000,"{""Monday"":4,""Tuesday"":4,""Wednesday"":4,""Thursda...",33042,4781,"[1594,1195,789,1047,930,927,726,928,1295,1130,...","[0,0,5,42,74,42,23,59,82,145,70,72,81,71,64,11...","{""550939606003"":305,""550939606001"":85,""2704908...","{""550939606003"":197,""550939606001"":54,""2704908...","{""550939606003"":130,""550939606001"":15,""2704908...","{""550939606003"":46,""550939606001"":5,""270490801...","{""550939606003"":70,""550939606001"":22,""27049080...","{""550939606003"":97,""550939606001"":20,""27049080...","{""550939606003"":7,""270490801025"":4,""2704908020...","{""550939606003"":92,""550939606001"":30,""27049080...","{""550939606003"":4,""550939606001"":6,""2704908020...","{""550939606003"":212,""550939607001"":66,""5509396...",5057.0,8137.0,11.366667,"{""Gas Stations"":9,""Kwik Trip"":9,""Walmart"":9,""M...","{""Walmart"":39,""Mall"":37,""Gas Stations"":33,""Kwi...","[83,88,94,135,163,155,129,119,130,181,136,179,...","[452,502,504,557,525,455,475,474,458,482,496,5...","[505,534,510,524,547,473,529,606,553,590,498,5...","[392,449,441,466,485,424,445,419,498,508,484,5...","[530,575,582,639,705,683,702,619,643,743,669,7...","[700,727,755,715,727,761,788,815,846,908,777,7...","[442,446,510,578,630,663,750,756,728,670,638,7...","[550,597,612,640,647,700,794,885,870,800,730,6...","{""android"":2400,""ios"":1295}",US,WI,2023,06
3,450770112042,Census Block Group,Census Block Group,2023-06-01 00:00:00.000,2023-07-01 00:00:00.000,"{""Monday"":4,""Tuesday"":4,""Wednesday"":4,""Thursda...",11249,3083,"[280,354,256,461,475,677,451,321,401,479,187,2...","[42,0,0,0,0,0,10,28,16,0,17,14,8,36,11,14,0,51...","{""450770112042"":113,""450730306021"":65,""4507701...","{""450770112042"":93,""450730306021"":53,""45077011...","{""450770112042"":33,""450730306021"":4,""450770112...","{""450770112042"":15,""450730306021"":4,""450770112...","{""450770112042"":22,""450730306021"":9,""450770112...","{""450770112042"":32,""450730306021"":13,""45077011...","{""450770112042"":4,""450730306021"":6,""4507701120...","{""450770112042"":34,""450730306021"":21,""45077011...","{""450770112042"":5,""450770112041"":4,""4507701120...","{""450770112042"":111,""450770111011"":77,""4507701...",6620.0,5274.0,22.566667,"{""Mall"":21,""Walmart"":14,""Universities and Coll...","{""Mall"":67,""Walmart"":56,""Gas Stations"":32,""Ing...","[36,25,27,28,22,28,39,72,22,12,14,16,10,55,37,...","[248,255,255,242,270,294,316,320,239,260,270,2...","[213,269,303,316,330,333,362,431,335,342,341,2...","[226,249,255,269,273,310,361,448,450,429,408,3...","[259,231,243,213,242,286,384,424,462,351,335,3...","[203,230,228,248,243,276,300,455,430,354,333,3...","[184,259,247,247,259,253,294,320,354,362,394,4...","[181,212,240,241,249,245,240,254,229,233,272,2...","{""android"":886,""ios"":997}",US,SC,2023,06
4,060375545222,Census Block Group,Census Block Group,2023-06-01 00:00:00.000,2023-07-01 00:00:00.000,"{""Monday"":4,""Tuesday"":4,""Wednesday"":4,""Thursda...",14560,6277,"[313,430,408,450,351,886,451,358,432,808,613,2...","[0,0,0,0,0,14,0,0,0,24,0,36,22,22,0,17,37,0,96...","{""060375545222"":28,""060375545122"":12,""32003006...","{""060375545222"":16,""060375545122"":6,""320030067...","{""060375545222"":4,""060375545212"":9,""0603755452...","{""060375545222"":4,""060375545212"":6,""0603755480...","{""060375545222"":5,""060375700012"":4,""0605911011...","{""060375545222"":4,""060375545122"":12,""320030067...","{""060375545212"":4,""060375545223"":4,""0603755360...","{""060375545222"":5,""060375545122"":4,""0603757000...","{""060375545212"":4,""060375541052"":4,""0603755490...","{""060375545222"":29,""060375545223"":14,""06037554...",9763.0,8771.0,25.483333,"{""Mall"":25,""Macerich"":18,""Starbucks"":10,""Amazo...","{""Mall"":86,""Macerich"":45,""Starbucks"":43,""Walma...","[0,0,0,0,0,20,0,0,0,23,23,59,81,121,97,77,76,5...","[102,101,119,100,93,103,102,116,77,213,396,353...","[123,97,121,117,135,160,121,96,79,258,339,358,...","[74,77,82,73,77,76,122,60,135,139,133,259,409,...","[76,73,78,77,81,102,76,102,132,199,254,213,272...","[60,60,62,57,54,95,117,113,154,220,254,549,533...","[94,116,99,80,102,118,137,123,78,192,295,352,3...","[62,54,60,83,81,94,93,135,133,212,376,388,333,...","{""android"":3534,""ios"":2013}",US,CA,2023,06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20952,060375433211,Census Block Group,Census Block Group,2023-06-01 00:00:00.000,2023-07-01 00:00:00.000,"{""Monday"":4,""Tuesday"":4,""Wednesday"":4,""Thursda...",76304,13849,"[2619,2600,2303,1380,3382,2773,2634,2639,2966,...","[0,39,38,24,0,43,60,329,241,138,211,172,158,21...","{""060375433211"":81,""060375433051"":21,""06037543...","{""060375433211"":59,""060375433051"":15,""06037543...","{""060375433211"":10,""060375433212"":4,""060375433...","{""060375433211"":12,""060375433051"":4,""060375433...","{""060375433211"":17,""060375433051"":4,""060375433...","{""060375433211"":16,""060375433212"":4,""060375430...","{""060375433211"":4,""060375433051"":4,""0603754332...","{""060375433211"":31,""060375433051"":10,""06037543...","{""060375433211"":4,""060379800281"":4,""0603760300...","{""060375433211"":142,""060375433051"":34,""0603754...",9998.0,5854.0,55.333333,"{""Universities and Colleges"":24,""Mall"":22,""AEG...","{""Mall"":76,""Walmart"":40,""Universities and Coll...","[162,197,235,240,196,214,221,330,447,467,495,3...","[533,570,604,593,611,805,887,966,1222,1295,149...","[748,703,532,490,490,567,827,1057,1414,1551,16...","[629,664,607,594,568,787,946,1237,1608,1589,16...","[925,988,919,868,725,903,1004,1261,1629,1652,1...","[803,863,789,726,825,1046,1195,1414,1707,1805,...","[570,708,649,667,731,769,822,850,863,900,843,8...","[605,612,571,572,547,571,570,604,730,851,848,7...","{""android"":9581,""ios"":3204}",US,CA,2023,06
20953,390375701021,Census Block Group,Census Block Group,2023-06-01 00:00:00.000,2023-07-01 00:00:00.000,"{""Monday"":4,""Tuesday"":4,""Wednesday"":4,""Thursda...",12605,1413,"[241,129,294,223,215,303,204,457,451,513,344,3...","[0,0,0,0,0,0,0,0,15,26,9,0,7,13,39,6,14,0,23,1...","{""390375701021"":183,""390375701023"":39,""3903754...","{""390375701021"":121,""390375701023"":23,""3903754...","{""390375701021"":73,""390375701023"":4,""390375401...","{""390375701021"":32,""390375701023"":4,""390375401...","{""390375701021"":32,""390375701023"":5,""390375401...","{""390375701021"":65,""390375701023"":6,""390375401...","{""390375701021"":9,""390375701023"":18,""390375701...","{""390375701021"":64,""390375701023"":10,""39037540...","{""390375701021"":13,""390375701023"":7,""390375701...","{""390375701021"":133,""390375701023"":30,""3903757...",1672.0,1672.0,20.883333,"{""Golf Courses"":32,""Mall"":16,""Walmart"":11,""Kro...","{""Mall"":41,""Golf Courses"":40,""Walmart"":38,""Kro...","[91,95,86,90,81,77,70,61,56,69,55,43,45,57,65,...","[276,272,250,240,174,206,259,291,280,321,284,2...","[283,270,268,261,242,265,287,296,321,332,303,3...","[330,344,343,340,297,309,268,278,236,247,246,2...","[425,418,421,418,353,382,383,357,396,420,413,3...","[364,372,369,372,329,327,378,315,318,330,347,3...","[277,291,284,280,283,303,345,329,321,315,292,3...","[307,353,322,335,300,316,329,349,368,396,428,4...","{""android"":617,""ios"":378}",US,OH,2023,06
20954,201259509002,Census Block Group,Census Block Group,2023-06-01 00:00:00.000,2023-07-01 00:00:00.000,"{""Monday"":4,""Tuesday"":4,""Wednesday"":4,""Thursda...",26633,4948,"[958,1126,1618,804,807,995,980,1092,1460,705,5...","[5,0,0,0,8,8,39,50,85,55,36,28,59,132,41,82,61...","{""201259509002"":151,""201259508001"":93,""2009995...","{""201259509002"":105,""201259508001"":58,""2009995...","{""201259509002"":61,""201259508001"":26,""20099950...","{""201259509002"":23,""201259508001"":9,""200999506...","{""201259509002"":46,""201259508001"":27,""20099950...","{""201259509002"":45,""201259508001"":16,""20099950...","{""201259509002"":4,""201259508001"":14,""401051721...","{""201259509002"":51,""201259508001"":37,""20099950...","{""200999506001"":4,""201259510002"":4,""2012595070...","{""201259509002"":170,""201259510003"":106,""201259...",7476.0,5267.0,20.033333,"{""Walmart"":52,""Gas Stations"":11,""CVR Partners""...","{""Walmart"":75,""Mall"":46,""Gas Stations"":41,""Hos...","[28,39,47,35,54,54,67,87,107,119,87,86,108,204...","[190,192,196,192,214,204,236,338,267,344,336,3...","[154,161,156,178,197,212,263,331,386,476,484,4...","[191,188,177,216,286,254,250,343,380,460,537,5...","[284,315,327,314,354,332,402,495,505,675,639,6...","[306,325,334,336,339,337,353,410,579,616,714,6...","[254,214,249,265,303,302,342,350,395,430,471,5...","[176,203,200,228,223,252,260,311,345,384,375,3...","{""android"":2675,""ios"":1362}",US,KS,2023,06
20955,550790190003,Census Block Group,Census Block Group,2023-06-01 00:00:00.000,2023-07-01 00:00:00.000,"{""Monday"":4,""Tuesday"":4,""Wednesday"":4,""Thursda...",4628,1532,"[142,92,79,77,133,149,173,175,280,127,78,132,1...","[15,0,0,0,0,0,0,0,0,10,0,14,0,0,8,8,17,0,25,31...","{""550790190003"":26,""550790190005"":13,""55079019...","{""550790190003"":17,""550790190005"":9,""550790190...","{""550790190003"":6,""550790190005"":4,""5507901900...","{""550790190003"":4,""550790190005"":4,""5507901900...","{""550790190003"":4,""550790190005"":4,""5507901900...","{""550790190003"":4,""550790190005"":4,""5507901900...","{""550790190001"":4,""550790191002"":4,""5507901910...","{""550790190003"":5,""550790190005"":6,""5507901900...","{""550790190005"":4,""550790190004"":7,""5507918740...","{""550790190003"":14,""550790190001"":8,""550790190...",2130.0,2131.0,40.950000,"{""Mall"":24,""Hospitals"":10,""Pick 'n Save"":8,""Wa...","{""Mall"":56,""Walmart"":50,""Pick 'n Save"":36,""Gas...","[39,42,38,39,41,41,41,42,38,61,41,37,25,24,34,...","[115,133,134,130,147,142,155,162,181,165,158,1...","[68,117,147,127,134,116,130,160,161,165,182,15...","[107,134,157,140,127,128,135,120,166,196,166,1...","[188,201,156,147,126,133,157,162,147,199,195,1...","[174,199,179,142,143,165,188,200,206,196,187,1...","[83,100,114,103,101,103,93,105,145,126,133,188...","[109,140,171,149,130,136,147,146,142,87,65,92,...","{""android"":820,""ios"":369}",US,WI,2023,06


In [20]:
import os
sqlite_fname = os.path.join(saved_dir, f'{year}_{month}.db')
sqlite_fname

'D:\\SafeGraph\\Neighborhood_Patterns_reorganized\\2023_06.db'

In [21]:
def create_CBG_index_table(sqlite_fname):
    conn = sqlite3.connect(sqlite_fname)
    curs = conn.cursor()
     
    # write the CBG index table
    # Convert the dataframe variable "CBG_df" into Sqlite database table "CBG_index", 
    # set the column "area" as the index column.
    CBG_df.set_index('area', inplace=True)
    CBG_df.to_sql('CBG_index', conn, if_exists='replace')
    
    # Close the connection
    conn.close()

# create_CBG_index_table(sqlite_fname)

In [22]:
# Create a table named "OD" if not exist. The columns are: origin, destination, stop. 
# The first two columns should be index. All datatypes are long int.

def create_OD_table(sqlite_fname):
    conn = sqlite3.connect(sqlite_fname)
    cursor = conn.cursor()
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS OD (
        origin INTEGER NOT NULL,
        destination INTEGER NOT NULL,
        stop INTEGER,
        PRIMARY KEY (origin, destination)
    );
    ''')
    
    conn.commit()
    conn.close()

# create_OD_table(sqlite_fname=sqlite_fname)

In [23]:
'''
update the "stop" coloumn in OD table by "origin" and "destination". 
If the record exists, adding the previous value. 
If not exists, insert a new record.
'''

conn = sqlite3.connect(sqlite_fname)


def update_stop_value(origin, destination, stop_increment, conn):
    cursor = conn.cursor()
    cursor.execute('''
    INSERT INTO OD(origin, destination, stop)
    VALUES (?, ?, ?)
    ON CONFLICT(origin, destination)
    DO UPDATE SET stop=stop + ?;
    ''', (origin, destination, stop_increment, stop_increment))



# origin = 1
# destination = 2
# stop_increment = 30
# update_stop_value(origin, destination, stop_increment, conn)

# conn.close()

In [24]:
def remove_all_rows_from_OD(db_path):
    """
    Remove all rows from the "OD" table in the SQLite database.
    
    Parameters:
    - db_path: path to the SQLite database file
    
    Returns:
    - None
    """
    # Connect to the database
    conn = sqlite3.connect(db_path)
    
    # Create a cursor object
    cursor = conn.cursor()
    
    # Execute the DELETE command
    cursor.execute("DELETE FROM OD")
    
    # Commit the transaction
    conn.commit()
    
    # Close the connection
    conn.close()

# Usage:
# db_path = sqlite_fname
# remove_all_rows_from_OD(sqlite_fname)

In [25]:
def create_neighborhood_table(sqlite_fname):
    conn = sqlite3.connect(sqlite_fname)
    curs = conn.cursor()
     
    columns = ['area', 'raw_stop_counts', 'raw_device_counts',  'median_dwell', 'stops_by_each_hour']
    
    np_table_df = np_df[columns]
    # print(np_table_df.columns)
    
    np_table_df.set_index('area', inplace=True)
    np_table_df.to_sql('neighborhood_patterns', conn, if_exists='replace')
    
    # Close the connection
    conn.commit()
    conn.close()

# create_neighborhood_table(sqlite_fname)

In [26]:
def split_home_area_stops():
    remove_all_rows_from_OD(sqlite_fname)

    conn = sqlite3.connect(sqlite_fname)
    for idx, row in tqdm(np_df.iloc[:].iterrows()):
        # stops_by_each_hour = row['stops_by_each_hour']
        # stops_by_each_hour = json.loads(stops_by_each_hour)
        # print(stops_by_each_hour)
        area = row['area']
        raw_device_counts = int(row['raw_device_counts'])
        raw_stop_counts = int(row['raw_stop_counts'])
        stop_per_device = raw_stop_counts / raw_device_counts
        # print(f'area: {area}', f"stop_per_device: {stop_per_device: 0.3f}", )
        
        device_home_areas = row['device_home_areas']
        device_home_areas = json.loads(device_home_areas)
        destination = row['area']
        origins = row['device_home_areas']
        # print(device_home_areas)
        # if origin in device_home_areas.keys():
            # home_device_ratio =  device_home_areas[area] / raw_device_counts
            # print(f"{area} home device count: ", device_home_areas[area], f"{home_device_ratio: 0.3f}")
            # print()
        for index, (origin, stops) in enumerate(device_home_areas.items()):
            try:
                origin_idx = CBG_dict[origin]
                destination_idx = CBG_dict[area]
                update_stop_value(origin=origin_idx, destination=destination_idx, stop_increment=stops, conn=conn)
            except:
                continue
        conn.commit()
    conn.close()

In [27]:
conn = sqlite3.connect(sqlite_fname)
for idx, row in tqdm(np_df.sample(10).iterrows()):
    stops_by_each_hour = row['stops_by_each_hour']
    stops_by_each_hour = json.loads(stops_by_each_hour)
    # print(stops_by_each_hour)
    area = row['area']
    raw_device_counts = int(row['raw_device_counts'])
    raw_stop_counts = int(row['raw_stop_counts'])
    stop_per_device = raw_stop_counts / raw_device_counts
    # print(f'area: {area}', f"stop_per_device: {stop_per_device: 0.3f}", )
    print(f'area: {area}', f"sum of stops_by_each_hour: {sum(stops_by_each_hour)}", )
    print(f'area: {area}', f"raw_stop_counts: {raw_stop_counts}, raw_device_counts: {raw_device_counts}, stop_per_device: {stop_per_device:.1f}", )
    
    device_home_areas = row['device_home_areas']
    device_home_areas = json.loads(device_home_areas)
    destination = row['area']
    origins = row['device_home_areas']
    # print(device_home_areas)
    # print(device_home_areas)
    # if origin in device_home_areas.keys():
        # home_device_ratio =  device_home_areas[area] / raw_device_counts
        # print(f"{area} home device count: ", device_home_areas[area], f"{home_device_ratio: 0.3f}")
        # print()
    for index, (origin, stops) in enumerate(device_home_areas.items()):
        try:
            origin_idx = CBG_dict[origin]
            destination_idx = CBG_dict[area]
            # update_stop_value(origin=origin_idx, destination=destination_idx, stop_increment=stops, conn=conn)
        except:
            continue
    conn.commit()
conn.close()

10it [00:00, 714.67it/s]

area: 260992264001 sum of stops_by_each_hour: 73452
area: 260992264001 raw_stop_counts: 73846, raw_device_counts: 13746, stop_per_device: 5.4
area: 180479698003 sum of stops_by_each_hour: 24942
area: 180479698003 raw_stop_counts: 25212, raw_device_counts: 2297, stop_per_device: 11.0
area: 171770005001 sum of stops_by_each_hour: 14107
area: 171770005001 raw_stop_counts: 14295, raw_device_counts: 2865, stop_per_device: 5.0
area: 360594101004 sum of stops_by_each_hour: 1357
area: 360594101004 raw_stop_counts: 1361, raw_device_counts: 867, stop_per_device: 1.6
area: 120350602142 sum of stops_by_each_hour: 26919
area: 120350602142 raw_stop_counts: 27324, raw_device_counts: 5160, stop_per_device: 5.3
area: 181410115052 sum of stops_by_each_hour: 88390
area: 181410115052 raw_stop_counts: 88668, raw_device_counts: 18467, stop_per_device: 4.8
area: 261635579001 sum of stops_by_each_hour: 9522
area: 261635579001 raw_stop_counts: 9692, raw_device_counts: 2607, stop_per_device: 3.7
area: 350130010




In [28]:
df.columns

Index(['area', 'area_type', 'origin_area_type', 'date_range_start',
       'date_range_end', 'day_counts', 'raw_stop_counts', 'raw_device_counts',
       'stops_by_day', 'stops_by_each_hour', 'device_home_areas',
       'weekday_device_home_areas', 'weekend_device_home_areas',
       'breakfast_device_home_areas', 'lunch_device_home_areas',
       'afternoon_tea_device_home_areas', 'dinner_device_home_areas',
       'nightlife_device_home_areas', 'work_hours_device_home_areas',
       'work_behavior_device_home_areas', 'device_daytime_areas',
       'distance_from_home', 'distance_from_primary_daytime_location',
       'median_dwell', 'top_same_day_brand', 'top_same_month_brand',
       'popularity_by_each_hour', 'popularity_by_hour_monday',
       'popularity_by_hour_tuesday', 'popularity_by_hour_wednesday',
       'popularity_by_hour_thursday', 'popularity_by_hour_friday',
       'popularity_by_hour_saturday', 'popularity_by_hour_sunday',
       'device_type', 'iso_country_code', 're

In [29]:
df[['area', 'raw_stop_counts', 'raw_device_counts',  'median_dwell', 'stops_by_each_hour']].memory_usage(deep=True).sum()  / (2 **30)

0.41149138286709785

## Get ACS data

In [1]:
# ! pip install CensusData
import pandas as pd
import censusdata
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.precision', 2)
pd.set_option('display.precision', 2)

In [53]:
ACS2019_fname = r'G:\covid_mobility_results\new_census_data\ACS_2019_5YR_BG\ACS_race_cbsa_2019.csv'
ACS_2019_df = pd.read_csv(ACS2019_fname)
ACS_2019_df['CBG_FIPS'] = ACS_2019_df['GEOID_Data'].str[-12:]
ACS_2019_df.columns
ACS_2019_df

Unnamed: 0,OBJECTID,STATEFP,COUNTYFP,TRACTCE,BLKGRPCE,INTPTLAT,INTPTLON,Shape_Area,GEOID_Data,county_code,cbg_total_population,p_asian,p_black,p_white,CBSA Code,CSA Code,CBSA Title,Metropolitan/Micropolitan Statistical Area,CSA Title,County/County Equivalent,State Name,FIPS State Code,FIPS County Code,GEOID,total_household_income,total_households,mean_household_income,median_household_income,CBG_FIPS
0,1.0,1,73,5903,3,33.60,-86.68,2.03e+06,15000US010730059033,1073,1991,0.02,0.69,0.29,13820,142.0,"Birmingham-Hoover, AL",Metropolitan Statistical Area,"Birmingham-Hoover-Talladega, AL",Jefferson County,Alabama,1,73,15000US010730059033,3.90e+07,811.0,48069.79,41875,010730059033
1,2.0,1,73,5903,1,33.60,-86.67,4.29e+06,15000US010730059031,1073,2187,0.00,0.45,0.48,13820,142.0,"Birmingham-Hoover, AL",Metropolitan Statistical Area,"Birmingham-Hoover-Talladega, AL",Jefferson County,Alabama,1,73,15000US010730059031,4.97e+07,612.0,81200.48,92649,010730059031
2,3.0,1,73,5905,3,33.61,-86.69,2.42e+06,15000US010730059053,1073,2741,0.00,0.79,0.18,13820,142.0,"Birmingham-Hoover, AL",Metropolitan Statistical Area,"Birmingham-Hoover-Talladega, AL",Jefferson County,Alabama,1,73,15000US010730059053,5.54e+07,867.0,63877.91,48906,010730059053
3,4.0,1,73,5905,1,33.60,-86.70,1.44e+06,15000US010730059051,1073,1213,0.00,0.77,0.23,13820,142.0,"Birmingham-Hoover, AL",Metropolitan Statistical Area,"Birmingham-Hoover-Talladega, AL",Jefferson County,Alabama,1,73,15000US010730059051,2.63e+07,517.0,50961.84,40679,010730059051
4,5.0,1,73,5702,3,33.47,-86.88,7.35e+05,15000US010730057023,1073,412,0.00,0.96,0.04,13820,142.0,"Birmingham-Hoover, AL",Metropolitan Statistical Area,"Birmingham-Hoover-Talladega, AL",Jefferson County,Alabama,1,73,15000US010730057023,8.68e+06,205.0,42353.26,38669,010730057023
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
202620,220133.0,72,47,530600,1,18.26,-66.32,6.03e+06,15000US720475306001,72047,1386,0.00,0.10,0.76,41980,490.0,"San Juan-Bayamón-Caguas, PR",Metropolitan Statistical Area,"San Juan-Bayamón, PR",Corozal Municipio,Puerto Rico,72,47,15000US720475306001,1.22e+07,419.0,29068.95,0,720475306001
202621,220166.0,72,47,530100,2,18.35,-66.33,3.52e+06,15000US720475301002,72047,905,0.00,0.10,0.85,41980,490.0,"San Juan-Bayamón-Caguas, PR",Metropolitan Statistical Area,"San Juan-Bayamón, PR",Corozal Municipio,Puerto Rico,72,47,15000US720475301002,6.03e+06,333.0,18122.84,13302,720475301002
202622,220167.0,72,47,530300,1,18.34,-66.31,3.92e+05,15000US720475303001,72047,643,0.00,0.00,0.96,41980,490.0,"San Juan-Bayamón-Caguas, PR",Metropolitan Statistical Area,"San Juan-Bayamón, PR",Corozal Municipio,Puerto Rico,72,47,15000US720475303001,5.75e+06,225.0,25555.32,0,720475303001
202623,220265.0,72,47,530300,2,18.34,-66.32,6.71e+05,15000US720475303002,72047,1893,0.00,0.16,0.79,41980,490.0,"San Juan-Bayamón-Caguas, PR",Metropolitan Statistical Area,"San Juan-Bayamón, PR",Corozal Municipio,Puerto Rico,72,47,15000US720475303002,8.22e+06,497.0,16549.13,8235,720475303002


In [62]:
import sqlite3
import pandas as pd

def fetch_random_rows_to_dataframe(sqlite_fname):
    """
    Fetch random 10 rows from the table "neighborhood_patterns" in SQLite database,
    and return them as a Pandas DataFrame.

    Parameters:
    - sqlite_fname: path to the SQLite database file

    Returns:
    - DataFrame containing random 10 rows
    """
    # Connect to the database
    conn = sqlite3.connect(sqlite_fname)
    
    # SQL query to fetch random 10 rows
    sql_query = "SELECT * FROM neighborhood_patterns ORDER BY RANDOM() LIMIT 10;"
    
    # Use Pandas to execute the SQL query and fetch the result into a DataFrame
    df = pd.read_sql_query(sql_query, conn)

    # Close the connection
    conn.close()

    return df

# Usage:
# sqlite_fname = 'path_to_your_database_file.db'
random_rows_df = fetch_random_rows_to_dataframe(sqlite_fname)
# print(random_rows_df)

random_rows_df = random_rows_df.merge(ACS_2019_df, left_on='area', right_on='CBG_FIPS')
random_rows_df

Unnamed: 0,area,raw_stop_counts,raw_device_counts,median_dwell,stops_by_each_hour,OBJECTID,STATEFP,COUNTYFP,TRACTCE,BLKGRPCE,INTPTLAT,INTPTLON,Shape_Area,GEOID_Data,county_code,cbg_total_population,p_asian,p_black,p_white,CBSA Code,CSA Code,CBSA Title,Metropolitan/Micropolitan Statistical Area,CSA Title,County/County Equivalent,State Name,FIPS State Code,FIPS County Code,GEOID,total_household_income,total_households,mean_household_income,median_household_income,CBG_FIPS
0,202090426003,15116,3454,14.13,"[12,0,0,14,18,7,7,8,32,49,19,0,16,27,38,100,34...",77416.0,20,209,42600,3,39.08,-94.63,928000.0,15000US202090426003,20209,752,0.00532,0.00266,0.84,28140,312.0,"Kansas City, MO-KS",Metropolitan Statistical Area,"Kansas City-Overland Park-Kansas City, MO-KS",Wyandotte County,Kansas,20,209,15000US202090426003,9980000.0,218.0,45767.9,27333,202090426003
1,220170237003,13504,2643,19.08,"[13,0,6,0,16,0,57,44,16,8,73,16,21,42,27,71,47...",82916.0,22,17,23700,3,32.43,-93.77,2020000.0,15000US220170237003,22017,2114,0.0,0.97,0.02,43340,508.0,"Shreveport-Bossier City, LA",Metropolitan Statistical Area,"Shreveport-Bossier City-Minden, LA",Caddo Parish,Louisiana,22,17,15000US220170237003,14700000.0,599.0,24482.07,24306,220170237003
2,260750054001,10619,1992,18.98,"[0,0,0,0,0,39,30,35,8,37,22,11,42,0,49,7,12,10...",96033.0,26,75,5400,1,42.24,-84.51,13200000.0,15000US260750054001,26075,1585,0.0,0.0,1.0,27100,,"Jackson, MI",Metropolitan Statistical Area,,Jackson County,Michigan,26,75,15000US260750054001,45500000.0,478.0,95125.09,79167,260750054001
3,340130049002,3984,719,121.97,"[0,0,0,0,0,0,0,0,6,0,0,17,12,41,20,0,15,0,12,1...",121020.0,34,13,4900,2,40.71,-74.21,105000.0,15000US340130049002,34013,965,0.0,1.0,0.0,35620,408.0,"New York-Newark-Jersey City, NY-NJ-PA",Metropolitan Statistical Area,"New York-Newark, NY-NJ-CT-PA",Essex County,New Jersey,34,13,15000US340130049002,19200000.0,381.0,50366.97,34946,340130049002
4,270270301041,6409,995,69.27,"[0,0,0,0,0,0,0,0,0,0,0,18,0,8,6,22,31,26,0,0,9...",106038.0,27,27,30104,1,46.69,-96.67,239000000.0,15000US270270301041,27027,622,0.00643,0.00322,0.96,22020,244.0,"Fargo, ND-MN",Metropolitan Statistical Area,"Fargo-Wahpeton, ND-MN",Clay County,Minnesota,27,27,15000US270270301041,21500000.0,258.0,83303.81,78750,270270301041
5,360810865001,21416,5635,9.88,"[0,0,0,0,0,0,55,32,115,123,77,96,92,95,39,60,9...",128215.0,36,81,86500,1,40.76,-73.83,79900.0,15000US360810865001,36081,2001,0.691,0.0395,0.22,35620,408.0,"New York-Newark-Jersey City, NY-NJ-PA",Metropolitan Statistical Area,"New York-Newark, NY-NJ-CT-PA",Queens County,New York,36,81,15000US360810865001,55200000.0,1096.0,50341.74,31250,360810865001
6,391535318013,11343,2548,42.7,"[0,0,0,0,0,25,11,37,31,15,21,17,16,11,18,12,28...",152163.0,39,153,531801,3,40.99,-81.54,1870000.0,15000US391535318013,39153,1035,0.0744,0.0184,0.89,10420,184.0,"Akron, OH",Metropolitan Statistical Area,"Cleveland-Akron-Canton, OH",Summit County,Ohio,39,153,15000US391535318013,38700000.0,519.0,74551.57,67266,391535318013
7,211110117091,25159,2984,48.63,"[10,16,0,9,15,11,102,48,94,43,48,48,19,32,94,1...",81508.0,21,111,11709,1,38.09,-85.61,15400000.0,15000US211110117091,21111,2331,0.0,0.048,0.93,31140,350.0,"Louisville/Jefferson County, KY-IN",Metropolitan Statistical Area,Louisville/Jefferson County--Elizabethtown--Ba...,Jefferson County,Kentucky,21,111,15000US211110117091,72100000.0,893.0,80685.42,73365,211110117091


In [100]:
def show_CBG_samples(sample_df):
    for idx, row in tqdm(sample_df.iterrows()):
        stops_by_each_hour = row['stops_by_each_hour']
        # stops_by_each_hour = json.loads(stops_by_each_hour)
        # print(stops_by_each_hour)
        area = row['area']
        raw_device_counts = int(row['raw_device_counts'])
        raw_stop_counts = int(row['raw_stop_counts'])
        stop_per_device = raw_stop_counts / raw_device_counts
        # print(f'area: {area}', f"stop_per_device: {stop_per_device: 0.3f}", )
        sampling_rate = row['raw_device_counts'] / row['cbg_total_population']
        print(f'area: {area}', f"sum of stops_by_each_hour: {sum(stops_by_each_hour)}, ",
              f"sampling_rate: {sampling_rate:0.3f}")
        print(f'area: {area}', f"raw_stop_counts: {raw_stop_counts}, ",
              f"raw_device_counts: {raw_device_counts},",
              f"stop_per_device: {stop_per_device:.1f}", 
               f"cbg_total_population:", row['cbg_total_population'])
        print()
        # device_home_areas = row['device_home_areas']
        # device_home_areas = json.loads(device_home_areas)
        # destination = row['area']
        # origins = row['device_home_areas']

random_rows_df = fetch_random_rows_to_dataframe(sqlite_fname)
random_rows_df['stops_by_each_hour'] = random_rows_df['stops_by_each_hour'].apply(json.loads)
random_rows_df = random_rows_df.merge(ACS_2019_df, left_on='area', right_on='CBG_FIPS')
show_CBG_samples(random_rows_df)

10it [00:00, 1342.74it/s]

area: 090091803001 sum of stops_by_each_hour: 13232,  sampling_rate: 4.056
area: 090091803001 raw_stop_counts: 13499,  raw_device_counts: 3062, stop_per_device: 4.4 cbg_total_population: 755

area: 421314002001 sum of stops_by_each_hour: 10597,  sampling_rate: 1.307
area: 421314002001 raw_stop_counts: 10645,  raw_device_counts: 1472, stop_per_device: 7.2 cbg_total_population: 1126

area: 121270906001 sum of stops_by_each_hour: 22765,  sampling_rate: 3.890
area: 121270906001 raw_stop_counts: 23095,  raw_device_counts: 4466, stop_per_device: 5.2 cbg_total_population: 1148

area: 211379201023 sum of stops_by_each_hour: 14218,  sampling_rate: 0.908
area: 211379201023 raw_stop_counts: 14390,  raw_device_counts: 1486, stop_per_device: 9.7 cbg_total_population: 1636

area: 220550021013 sum of stops_by_each_hour: 11783,  sampling_rate: 3.181
area: 220550021013 raw_stop_counts: 12019,  raw_device_counts: 3222, stop_per_device: 3.7 cbg_total_population: 1013

area: 132450015002 sum of stops_by_e




In [15]:
import sqlite3
import pandas as pd

def fetch_all_OD_to_dataframe(sqlite_fname):
   
    # Connect to the database
    conn = sqlite3.connect(sqlite_fname)
     
    sql_query = "SELECT * FROM OD limit 10"
    
    # Use Pandas to execute the SQL query and fetch the result into a DataFrame
    df = pd.read_sql_query(sql_query, conn)

    # Close the connection
    conn.close()

    return df

sqlite_fname = os.path.join(saved_dir, f'2023_06.db')
print(sqlite_fname)
conn = sqlite3.connect(sqlite_fname)
 
sql_query = "SELECT * FROM OD;"
# Close the connection

# Use Pandas to execute the SQL query and fetch the result into a DataFrame
OD_df = pd.read_sql_query(sql_query, conn)
cursor = conn.cursor()

# cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")

# Fetch the rows and extract table names
# tables = [table[0] for table in cursor.fetchall()]

conn.close()


OD_df
# tables


D:\SafeGraph\Neighborhood_Patterns_reorganized\2023_06.db


Unnamed: 0,origin,destination,stop
0,217823,217760,6
1,217757,217760,4
2,217821,217760,4
3,217828,217760,4
4,217823,217749,6
...,...,...,...
185534602,57944,58222,4
185534603,58146,58222,4
185534604,58098,58222,4
185534605,118432,58222,4


In [9]:
fetch_all_OD_to_dataframe(sqlite_fname)

DatabaseError: Execution failed on sql 'SELECT * FROM OD': no such table: OD

In [48]:

# CBGs = censusdata.download('acs5', 2015,
#                              censusdata.censusgeo([('state', '17'), ('county', '031'), ('block group', '*')]),
#                              ['B23025_003E', 'B23025_005E', 'B15003_001E', 'B15003_002E', 'B15003_003E',
#                               'B15003_004E', 'B15003_005E', 'B15003_006E', 'B15003_007E', 'B15003_008E',
#                               'B15003_009E', 'B15003_010E', 'B15003_011E', 'B15003_012E', 'B15003_013E',
#                               'B15003_014E', 'B15003_015E', 'B15003_016E'])

# CBGs

# 3d iterative proportional fitting

https://edyhsgr.github.io/IPFDescription/AKDOLWDIPFTHREED.pdf

https://u.demog.berkeley.edu/~eddieh/IPFDescription/AKDOLWDIPFTWOD.pdf

# Get the visitors of each CBG

In [49]:
import numpy as np
from netCDF4 import Dataset

# Create a sample matrix (for the sake of demonstration)
matrix = np.random.rand(220000, 220000 )
matrix

MemoryError: Unable to allocate 361. GiB for an array with shape (220000, 220000) and data type float64

In [None]:


# Create a new netCDF file
rootgrp = Dataset("matrix.nc", "w", format="NETCDF4")

# Create dimensions for the matrix
dim_x = rootgrp.createDimension("x", 220684)
dim_y = rootgrp.createDimension("y", 220684)

# Create a variable in the file for the matrix and set its data
matrix_var = rootgrp.createVariable("matrix", "f8", ("x", "y"), zlib=True, complevel=4) # "f8" is for double precision float
matrix_var[:, :] = matrix

# Close the netCDF file
rootgrp.close()

In [31]:
df2 = pd.read_csv(r'K:\SafeGraph\Advan_2023_API\Neighborhood_Patterns\2023\05\01\data_01af1467-0604-be6e-0043-0b8700e85436_13_7_0.csv.gz')
df2

Unnamed: 0,AREA,AREA_TYPE,ORIGIN_AREA_TYPE,DATE_RANGE_START,DATE_RANGE_END,DAY_COUNTS,RAW_STOP_COUNTS,RAW_DEVICE_COUNTS,STOPS_BY_DAY,STOPS_BY_EACH_HOUR,DEVICE_HOME_AREAS,WEEKDAY_DEVICE_HOME_AREAS,WEEKEND_DEVICE_HOME_AREAS,BREAKFAST_DEVICE_HOME_AREAS,LUNCH_DEVICE_HOME_AREAS,DINNER_DEVICE_HOME_AREAS,NIGHTLIFE_DEVICE_HOME_AREAS,WORK_HOURS_DEVICE_HOME_AREAS,WORK_BEHAVIOR_DEVICE_HOME_AREAS,DEVICE_DAYTIME_AREAS,DISTANCE_FROM_HOME,DISTANCE_FROM_PRIMARY_DAYTIME_LOCATION,MEDIAN_DWELL,TOP_SAME_DAY_BRAND,TOP_SAME_MONTH_BRAND,POPULARITY_BY_EACH_HOUR,POPULARITY_BY_HOUR_MONDAY,POPULARITY_BY_HOUR_TUESDAY,POPULARITY_BY_HOUR_WEDNESDAY,POPULARITY_BY_HOUR_THURSDAY,POPULARITY_BY_HOUR_FRIDAY,POPULARITY_BY_HOUR_SATURDAY,POPULARITY_BY_HOUR_SUNDAY,DEVICE_TYPE,ISO_COUNTRY_CODE,REGION,Y,M
0,290470205003,Census Block Group,Census Block Group,2023-05-01 00:00:00.000,2023-06-01 00:00:00.000,"""{\""Monday\"":5,\""Tuesday\"":5,\""Wednesday\"":5,\...",14830,1766,"""[321,321,492,471,636,344,366,416,708,377,495,...","""[0,0,0,0,0,0,11,11,0,0,17,25,38,27,40,0,29,23...","""{\""290470205003\"":140,\""290470202013\"":12,\""2...","""{\""290470205003\"":113,\""290470202013\"":6,\""29...","""{\""290470205003\"":53,\""290470202013\"":4,\""291...","""{\""290470205003\"":44,\""291650303082\"":4,\""291...","""{\""290470205003\"":30,\""290470202013\"":4,\""291...","""{\""290470205003\"":51,\""290470202013\"":5,\""291...","""{\""290470205003\"":9,\""290470202013\"":5,\""2909...","""{\""290470205003\"":57,\""290470202013\"":5,\""291...","""{\""290470205003\"":18,\""290950166001\"":4,\""290...","""{\""290470205003\"":155,\""291650306001\"":9,\""20...",27,32,85.016667,"""{\""Mall\"":24,\""Walmart\"":17,\""Gas Stations\"":...","""{\""Mall\"":55,\""Walmart\"":43,\""Gas Stations\"":...","""[85,87,83,84,84,87,103,118,94,102,100,109,131...","""[514,509,541,547,594,613,668,680,541,571,597,...","""[643,651,640,623,697,722,763,719,682,653,651,...","""[576,638,653,612,692,706,781,808,751,634,676,...","""[384,398,414,425,467,506,525,552,416,441,494,...","""[480,480,479,495,569,625,671,710,605,554,555,...","""[412,424,430,420,424,442,414,453,411,428,382,...","""[463,512,492,543,519,524,580,566,596,607,572,...","""{\""android\"":1104,\""ios\"":423}""",US,MO,2023,5
1,291892122007,Census Block Group,Census Block Group,2023-05-01 00:00:00.000,2023-06-01 00:00:00.000,"""{\""Monday\"":5,\""Tuesday\"":5,\""Wednesday\"":5,\...",9277,2702,"""[237,329,255,196,352,237,350,116,382,345,227,...","""[0,0,0,0,0,0,0,0,12,0,0,15,11,26,13,49,8,38,2...","""{\""291892122007\"":47,\""291892108042\"":20,\""29...","""{\""291892122007\"":31,\""291892108042\"":14,\""29...","""{\""291892122007\"":11,\""291892121022\"":4,\""291...","""{\""291892122007\"":6,\""291892108042\"":4,\""2918...","""{\""291892122007\"":9,\""291892108042\"":4,\""2951...","""{\""291892122007\"":11,\""291892121022\"":4,\""291...","""{\""291892121014\"":4,\""291892122004\"":4,\""2918...","""{\""291892122007\"":17,\""291892108042\"":7,\""291...","""{\""291892122007\"":4,\""291892122001\"":4,\""2951...","""{\""291892122007\"":39,\""295101269003\"":26,\""29...",930,491,44.983333,"""{\""Mall\"":19,\""Universities and Colleges\"":9,...","""{\""Mall\"":67,\""Walmart\"":46,\""Gas Stations\"":...","""[49,53,47,49,54,55,53,57,54,41,65,78,82,68,48...","""[194,212,184,213,208,208,239,249,266,248,255,...","""[188,228,221,222,264,285,307,329,319,389,376,...","""[196,197,195,208,247,313,299,319,341,354,378,...","""[170,160,143,169,254,215,276,275,291,286,258,...","""[118,123,126,133,180,192,188,225,253,250,286,...","""[130,127,143,144,158,170,223,240,210,275,301,...","""[142,176,172,156,179,201,214,228,239,274,258,...","""{\""android\"":2181,\""ios\"":373}""",US,MO,2023,5
2,291892180033,Census Block Group,Census Block Group,2023-05-01 00:00:00.000,2023-06-01 00:00:00.000,"""{\""Monday\"":5,\""Tuesday\"":5,\""Wednesday\"":5,\...",24150,4625,"""[564,776,986,627,859,1030,896,394,966,812,895...","""[18,0,0,0,8,0,26,22,18,17,0,21,50,37,102,10,4...","""{\""291892180033\"":536,\""291892179422\"":121,\""...","""{\""291892180033\"":357,\""291892179422\"":85,\""2...","""{\""291892180033\"":169,\""291892179422\"":18,\""2...","""{\""291892180033\"":61,\""291892179422\"":4,\""291...","""{\""291892180033\"":125,\""291892179422\"":21,\""2...","""{\""291892180033\"":131,\""291892179422\"":29,\""2...","""{\""291892180033\"":5,\""291892179422\"":14,\""291...","""{\""291892180033\"":156,\""291892179422\"":49,\""2...","""{\""291892180033\"":25,\""291892179422\"":4,\""291...","""{\""291892180033\"":380,\""291892179422\"":68,\""2...",10,11,32.150000,"""{\""Mall\"":26,\""Schnucks\"":16,\""Walmart\"":12,\...","""{\""Mall\"":69,\""Walmart\"":46,\""Schnucks\"":36,\...","""[90,88,88,96,104,106,102,107,92,83,61,75,100,...","""[583,596,582,584,621,687,742,771,729,714,650,...","""[626,645,666,648,702,773,739,829,715,739,784,...","""[649,658,653,687,747,800,844,868,713,670,646,...","""[630,654,653,671,710,803,787,912,832,829,852,...","""[618,654,694,715,787,760,775,784,831,838,828,...","""[471,491,528,512,515,619,671,713,793,749,709,...","""[522,548,568,594,592,634,641,728,732,753,813,...","""{\""android\"":2057,\""ios\"":1391}""",US,MO,2023,5
3,295101192001,Census Block Group,Census Block Group,2023-05-01 00:00:00.000,2023-06-01 00:00:00.000,"""{\""Monday\"":5,\""Tuesday\"":5,\""Wednesday\"":5,\...",6063,1524,"""[329,264,215,245,183,230,150,82,230,150,85,29...","""[20,7,4,12,17,33,0,9,8,27,18,34,26,40,6,9,0,0...","""{\""295101192001\"":32,\""295101111002\"":9,\""291...","""{\""295101192001\"":20,\""295101111002\"":5,\""291...","""{\""295101192001\"":17,\""295101111002\"":4,\""291...","""{\""295101192001\"":9,\""295101111002\"":4,\""2918...","""{\""295101192001\"":10,\""295101111002\"":4,\""291...","""{\""295101192001\"":11,\""291892123001\"":4,\""291...","""{\""295101192001\"":4,\""295101111002\"":4,\""2951...","""{\""295101192001\"":12,\""295101111002\"":4,\""291...","""{\""295101192001\"":4,\""295101111002\"":5,\""3903...","""{\""295101192001\"":39,\""295101186001\"":7,\""295...",16,16,73.416667,"""{\""Universities and Colleges\"":25,\""Mall\"":15...","""{\""Mall\"":66,\""Universities and Colleges\"":51...","""[38,45,48,36,36,37,26,39,35,51,34,39,34,45,32...","""[243,258,259,230,237,224,199,227,221,229,227,...","""[237,251,237,253,241,208,213,208,258,250,293,...","""[296,308,335,340,316,276,293,296,276,251,192,...","""[201,221,228,224,216,174,199,145,165,176,158,...","""[211,209,217,237,238,197,211,180,175,166,176,...","""[213,211,238,229,256,257,258,253,248,240,261,...","""[175,198,202,197,196,199,236,227,203,233,208,...","""{\""android\"":984,\""ios\"":339}""",US,MO,2023,5
4,280890301072,Census Block Group,Census Block Group,2023-05-01 00:00:00.000,2023-06-01 00:00:00.000,"""{\""Monday\"":5,\""Tuesday\"":5,\""Wednesday\"":5,\...",47343,13162,"""[1457,1557,1767,1756,1820,1888,1658,855,1801,...","""[81,11,0,0,46,28,46,112,45,0,19,16,83,118,115...","""{\""280890301072\"":401,\""280890304002\"":206,\""...","""{\""280890301072\"":278,\""280890304002\"":120,\""...","""{\""280890301072\"":131,\""280890304002\"":31,\""2...","""{\""280890301072\"":48,\""280890304002\"":4,\""281...","""{\""280890301072\"":72,\""280890304002\"":36,\""28...","""{\""280890301072\"":127,\""280890304002\"":38,\""2...","""{\""280890301072\"":4,\""281210203023\"":15,\""280...","""{\""280890301072\"":146,\""280890304002\"":57,\""2...","""{\""280890301072\"":4,\""280890304002\"":5,\""2808...","""{\""280890301072\"":353,\""281210202111\"":187,\""...",3609,4109,9.516667,"""{\""Mall\"":32,\""Walmart\"":12,\""Kroger\"":10,\""G...","""{\""Mall\"":80,\""Walmart\"":55,\""Kroger\"":42,\""G...","""[164,167,164,200,237,198,204,235,205,180,198,...","""[546,584,636,673,682,773,789,916,740,716,771,...","""[747,782,830,866,970,954,887,1103,855,832,753...","""[818,863,905,903,1002,1049,1081,1064,968,806,...","""[638,684,685,739,849,866,854,848,649,615,613,...","""[522,529,616,664,690,698,687,618,613,598,705,...","""[584,600,616,649,634,713,737,819,798,869,1002...","""[598,580,587,556,621,632,766,836,853,901,920,...","""{\""android\"":3348,\""ios\"":4106}""",US,MS,2023,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22025,181410034003,Census Block Group,Census Block Group,2023-05-01 00:00:00.000,2023-06-01 00:00:00.000,"""{\""Monday\"":5,\""Tuesday\"":5,\""Wednesday\"":5,\...",6863,1426,"""[196,240,242,169,142,208,183,146,240,401,192,...","""[0,0,8,0,0,12,0,8,0,9,35,8,0,10,0,17,14,0,25,...","""{\""181410034003\"":15,\""181410034002\"":8,\""181...","""{\""181410034003\"":10,\""181410034002\"":6,\""181...","""{\""181410034003\"":4,\""181410027001\"":4,\""1814...","""{\""181410034003\"":4,\""181410034004\"":4,\""1814...","""{\""181410034003\"":4,\""181410027001\"":4,\""1814...","""{\""181410034003\"":4,\""181410032003\"":4,\""1814...","""{\""181410027001\"":4,\""181410033003\"":4,\""1814...","""{\""181410034003\"":5,\""181410034002\"":4,\""1814...","""{\""181410004001\"":4,\""181410007002\"":4}""","""{\""181410034003\"":17,\""181410034002\"":6,\""181...",65,465,93.800000,"""{\""Mall\"":19,\""Walmart\"":13,\""Gas Stations\"":...","""{\""Mall\"":67,\""Walmart\"":47,\""McDonald's\"":35...","""[97,102,111,111,110,131,114,110,110,119,130,1...","""[233,229,248,256,246,273,256,240,242,255,271,...","""[244,272,238,238,245,243,254,243,271,273,202,...","""[210,242,228,245,239,238,274,243,245,243,242,...","""[170,182,210,176,175,166,209,187,207,167,208,...","""[161,165,162,163,138,125,157,158,191,157,152,...","""[138,144,153,150,162,169,202,216,218,222,223,...","""[228,239,259,226,241,227,226,230,257,281,265,...","""{\""android\"":1160,\""ios\"":151}""",US,IN,2023,5
22026,180859619003,Census Block Group,Census Block Group,2023-05-01 00:00:00.000,2023-06-01 00:00:00.000,"""{\""Monday\"":5,\""Tuesday\"":5,\""Wednesday\"":5,\...",14485,3174,"""[421,311,525,445,292,406,245,155,422,495,451,...","""[0,7,9,13,0,23,0,44,56,21,28,48,34,22,33,22,2...","""{\""180859619003\"":41,\""180859623002\"":31,\""18...","""{\""180859619003\"":34,\""180859623002\"":26,\""18...","""{\""180859619003\"":13,\""180859623002\"":9,\""180...","""{\""180859619003\"":10,\""180859623002\"":7,\""180...","""{\""180859619003\"":12,\""180859623002\"":7,\""180...","""{\""180859619003\"":6,\""180859623002\"":6,\""1808...","""{\""180859617003\"":4,\""180859620001\"":8,\""1808...","""{\""180859619003\"":20,\""180859623002\"":20,\""18...","""{\""180859623001\"":4,\""180859616001\"":4,\""3904...","""{\""180859619003\"":54,\""180859618004\"":44,\""18...",4924,4238,6.083333,"""{\""Mall\"":31,\""Walmart\"":17,\""Gas Stations\"":...","""{\""Mall\"":75,\""Walmart\"":61,\""Gas Stations\"":...","""[15,16,15,25,26,38,18,56,91,37,52,69,59,25,47...","""[107,124,123,138,128,150,163,269,328,264,281,...","""[97,103,121,146,120,156,127,330,327,342,315,2...","""[89,164,166,206,147,234,230,413,264,260,260,2...","""[73,124,151,141,158,178,222,321,280,188,230,2...","""[105,100,155,168,168,190,168,263,177,215,205,...","""[85,77,82,103,111,131,186,158,172,181,166,182...","""[68,82,102,103,104,93,118,168,159,188,187,154...","""{\""android\"":1474,\""ios\"":978}""",US,IN,2023,5
22027,180118104003,Census Block Group,Census Block Group,2023-05-01 00:00:00.000,2023-06-01 00:00:00.000,"""{\""Monday\"":5,\""Tuesday\"":5,\""Wednesday\"":5,\...",5815,1822,"""[151,264,234,221,298,268,134,98,140,320,230,1...","""[0,0,0,0,0,0,13,26,22,0,10,0,0,8,6,13,6,31,9,...","""{\""180118104003\"":145,\""180118105003\"":111,\""...","""{\""180118104003\"":101,\""180118105003\"":80,\""1...","""{\""180118104003\"":53,\""180118105003\"":10,\""18...","""{\""180118104003\"":21,\""180118105003\"":7,\""180...","""{\""180118104003\"":31,\""180118105003\"":15,\""18...","""{\""180118104003\"":36,\""180118105003\"":20,\""18...","""{\""180118105003\"":10,\""180118103003\"":4,\""180...","""{\""180118104003\"":44,\""180118105003\"":44,\""18...","""{\""180118104003\"":4,\""180118105003\"":9,\""1801...","""{\""180118105003\"":141,\""180118104003\"":94,\""1...",1846,1928,12.983333,"""{\""Walmart\"":18,\""Mall\"":15,\""Kroger\"":12,\""W...","""{\""Walmart\"":62,\""Mall\"":52,\""Kroger\"":37,\""M...","""[26,18,30,52,40,28,42,56,58,47,49,45,45,37,30...","""[157,158,174,198,186,160,170,189,185,151,180,...","""[152,190,200,195,201,200,266,200,195,204,180,...","""[168,172,159,181,159,164,185,180,139,126,168,...","""[155,184,198,206,219,204,188,157,125,151,151,...","""[117,142,137,145,153,152,165,175,199,187,117,...","""[158,165,136,135,126,133,165,192,238,228,224,...","""[179,206,204,196,191,208,242,240,239,328,219,...","""{\""android\"":718,\""ios\"":589}""",US,IN,2023,5
22028,180419545002,Census Block Group,Census Block Group,2023-05-01 00:00:00.000,2023-06-01 00:00:00.000,"""{\""Monday\"":5,\""Tuesday\"":5,\""Wednesday\"":5,\...",19776,4152,"""[604,651,507,400,527,469,560,597,656,659,685,...","""[35,17,12,0,0,0,10,70,18,53,32,34,79,36,26,17...","""{\""180419545002\"":92,\""180419543001\"":66,\""18...","""{\""180419545002\"":70,\""180419543001\"":32,\""18...","""{\""180419545002\"":46,\""180419543001\"":30,\""18...","""{\""180419545002\"":21,\""180419543001\"":5,\""180...","""{\""180419545002\"":26,\""180419543001\"":18,\""18...","""{\""180419545002\"":48,\""180419543001\"":16,\""18...","""{\""180419545002\"":4,\""180419544004\"":13,\""180...","""{\""180419545002\"":45,\""180419543001\"":18,\""18...","""{\""180419542001\"":4,\""180419545004\"":10,\""180...","""{\""180419545002\"":99,\""180419541002\"":79,\""18...",2709,2510,7.850000,"""{\""Mall\"":28,\""Walmart\"":22,\""Speedway\"":11,\...","""{\""Mall\"":75,\""Walmart\"":65,\""Kroger\"":37,\""S...","""[45,39,37,44,38,42,51,101,60,92,81,76,119,49,...","""[71,70,112,103,109,91,133,284,258,392,380,476...","""[91,100,129,139,169,143,174,276,337,375,467,4...","""[123,168,162,113,151,142,220,277,330,344,349,...","""[142,138,144,146,144,171,214,260,304,277,361,...","""[120,158,128,142,143,165,165,325,314,356,286,...","""[115,113,137,114,136,123,131,145,276,429,464,...","""[115,119,108,107,120,122,128,133,186,128,171,...","""{\""android\"":2714,\""ios\"":552}""",US,IN,2023,5


In [33]:
df2.columns

Index(['AREA', 'AREA_TYPE', 'ORIGIN_AREA_TYPE', 'DATE_RANGE_START',
       'DATE_RANGE_END', 'DAY_COUNTS', 'RAW_STOP_COUNTS', 'RAW_DEVICE_COUNTS',
       'STOPS_BY_DAY', 'STOPS_BY_EACH_HOUR', 'DEVICE_HOME_AREAS',
       'WEEKDAY_DEVICE_HOME_AREAS', 'WEEKEND_DEVICE_HOME_AREAS',
       'BREAKFAST_DEVICE_HOME_AREAS', 'LUNCH_DEVICE_HOME_AREAS',
       'DINNER_DEVICE_HOME_AREAS', 'NIGHTLIFE_DEVICE_HOME_AREAS',
       'WORK_HOURS_DEVICE_HOME_AREAS', 'WORK_BEHAVIOR_DEVICE_HOME_AREAS',
       'DEVICE_DAYTIME_AREAS', 'DISTANCE_FROM_HOME',
       'DISTANCE_FROM_PRIMARY_DAYTIME_LOCATION', 'MEDIAN_DWELL',
       'TOP_SAME_DAY_BRAND', 'TOP_SAME_MONTH_BRAND', 'POPULARITY_BY_EACH_HOUR',
       'POPULARITY_BY_HOUR_MONDAY', 'POPULARITY_BY_HOUR_TUESDAY',
       'POPULARITY_BY_HOUR_WEDNESDAY', 'POPULARITY_BY_HOUR_THURSDAY',
       'POPULARITY_BY_HOUR_FRIDAY', 'POPULARITY_BY_HOUR_SATURDAY',
       'POPULARITY_BY_HOUR_SUNDAY', 'DEVICE_TYPE', 'ISO_COUNTRY_CODE',
       'REGION', 'Y', 'M'],
      dtype='obje

In [None]:
['AREA', 'AREA_TYPE', 'ORIGIN_AREA_TYPE', 'DATE_RANGE_START',
       'DATE_RANGE_END', 'DAY_COUNTS', 'RAW_STOP_COUNTS', 'RAW_DEVICE_COUNTS',
       'STOPS_BY_DAY', 'STOPS_BY_EACH_HOUR', 'DEVICE_HOME_AREAS',
       'WEEKDAY_DEVICE_HOME_AREAS', 'WEEKEND_DEVICE_HOME_AREAS',
       'BREAKFAST_DEVICE_HOME_AREAS', 'LUNCH_DEVICE_HOME_AREAS',
       'DINNER_DEVICE_HOME_AREAS', 'NIGHTLIFE_DEVICE_HOME_AREAS',
       'WORK_HOURS_DEVICE_HOME_AREAS', 'WORK_BEHAVIOR_DEVICE_HOME_AREAS',
       'DEVICE_DAYTIME_AREAS', 'DISTANCE_FROM_HOME',
       'DISTANCE_FROM_PRIMARY_DAYTIME_LOCATION', 'MEDIAN_DWELL',
       'TOP_SAME_DAY_BRAND', 'TOP_SAME_MONTH_BRAND', 'POPULARITY_BY_EACH_HOUR',
       'POPULARITY_BY_HOUR_MONDAY', 'POPULARITY_BY_HOUR_TUESDAY',
       'POPULARITY_BY_HOUR_WEDNESDAY', 'POPULARITY_BY_HOUR_THURSDAY',
       'POPULARITY_BY_HOUR_FRIDAY', 'POPULARITY_BY_HOUR_SATURDAY',
       'POPULARITY_BY_HOUR_SUNDAY', 'DEVICE_TYPE', 'ISO_COUNTRY_CODE',
       'REGION', 'Y', 'M']

In [73]:
df['STOPS_BY_EACH_HOUR'] = df['STOPS_BY_EACH_HOUR'].apply(json.loads) 
df['STOPS_BY_EACH_HOUR'] = df['STOPS_BY_EACH_HOUR'].apply(json.loads) 

In [66]:
df['STOPS_BY_EACH_HOUR']

0        [0,0,0,0,0,0,0,0,0,20,0,0,0,0,0,0,30,5,0,0,14,...
1        [0,0,0,0,7,23,0,23,0,7,26,15,12,15,21,17,11,19...
2        [5,0,0,0,19,7,13,0,23,0,10,7,15,5,0,0,7,0,0,13...
3        [0,0,0,0,0,0,0,0,19,16,0,0,0,0,14,0,0,16,0,0,0...
4        [0,0,0,0,0,0,0,5,0,14,24,14,0,17,34,0,0,19,0,0...
                               ...                        
20243    [16,9,0,21,16,0,27,27,47,45,45,89,48,50,11,105...
20244    [46,25,44,0,46,27,71,66,92,54,87,59,137,125,43...
20245    [77,0,0,0,0,0,12,33,30,40,34,34,46,0,48,48,44,...
20246    [0,0,0,0,0,0,0,0,0,6,8,0,0,0,0,0,13,0,7,0,14,0...
20247    [0,0,0,0,0,24,36,68,104,117,66,10,74,48,92,57,...
Name: STOPS_BY_EACH_HOUR, Length: 220692, dtype: object

In [80]:
# df.iloc[6]['STOPS_BY_EACH_HOUR']

In [78]:
len(df.iloc[6]['STOPS_BY_EACH_HOUR'])

744

In [72]:
json.loads(df.iloc[0]['STOPS_BY_EACH_HOUR'])

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 20,
 0,
 0,
 0,
 0,
 0,
 0,
 30,
 5,
 0,
 0,
 14,
 6,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 11,
 0,
 13,
 0,
 5,
 0,
 14,
 0,
 8,
 5,
 0,
 0,
 15,
 0,
 0,
 0,
 0,
 0,
 0,
 19,
 0,
 33,
 7,
 0,
 0,
 0,
 0,
 0,
 22,
 0,
 22,
 12,
 22,
 0,
 0,
 14,
 7,
 0,
 0,
 25,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 15,
 8,
 0,
 22,
 8,
 23,
 0,
 15,
 35,
 0,
 21,
 0,
 13,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 6,
 0,
 0,
 13,
 0,
 0,
 0,
 0,
 0,
 19,
 15,
 0,
 0,
 24,
 8,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 5,
 5,
 21,
 10,
 14,
 10,
 0,
 0,
 0,
 13,
 10,
 18,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 28,
 7,
 0,
 0,
 0,
 21,
 58,
 7,
 11,
 0,
 0,
 0,
 6,
 0,
 0,
 0,
 0,
 8,
 0,
 0,
 0,
 0,
 0,
 34,
 39,
 10,
 6,
 20,
 0,
 0,
 14,
 0,
 8,
 0,
 29,
 7,
 0,
 0,
 0,
 5,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 8,
 15,
 0,
 0,
 14,
 0,
 13,
 11,
 37,
 13,
 0,
 0,
 14,
 5,
 0,
 21,
 0,
 0,
 0,
 0,
 11,
 0,
 0,
 10,
 10,
 30,
 0,
 0,
 15,
 0,
 0,
 15,
 21,
 15,
 0,
 

In [68]:
len(json.loads())

1746