# Load data and packages

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
import pandas as pd
pd.options.display.max_rows = 30
import matplotlib.pyplot as plt #if using matplotlib
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import numpy as np
import dbfread
from dbfread import DBF
import linearmodels
from linearmodels import IV2SLS, IVLIML, IVGMM, IVGMMCUE
import statsmodels.api as sm
import csv
import seaborn as sns

In [3]:
eligible=pd.read_csv('AuctionData/census_blocks.csv')
eligible.rename(columns = {'state':'state_abbreviation', 'census_id':'cbg_id', 'county':'county_name'}, inplace = True)
eligible['partially_eligible']=1
eligible=eligible[['state_abbreviation', 'county_name', 'block_id', 'partially_eligible','cbg_id']]

eligible_cbg=pd.read_csv('AuctionData/all_items.csv')  
bids=pd.read_csv('AuctionData/all_bid_results.csv')  

eligible.columns
eligible_cbg.columns
bids.columns

  exec(code_obj, self.user_global_ns, self.user_ns)


Index(['state_abbreviation', 'county_name', 'block_id', 'partially_eligible',
       'cbg_id'],
      dtype='object')

Index(['auction_id', 'item', 'state', 'county', 'census_id', 'reserve_price',
       'locations'],
      dtype='object')

Index(['auction_id', 'round', 'bidder', 'frn', 'item', 't+l_weight', 'tier',
       'latency', 'price_point_bid', 'bid_clock_pct_flag', 'min_scale_pct',
       'implied_support_bid', 'bid_id', 'my_assigned_status',
       'not_assigned_reason', 'assigned_support', 'assigned_support_type',
       'price_point_assigned', 'assigned_id', 'assigned_id_combined',
       'biddable_next_round', 'package_biddable_next_round', 'state', 'county',
       'census_id', 'locations', 'selection_number'],
      dtype='object')

# Merge datasets

In [4]:
eligible_cbg['tract_num'] = eligible_cbg['census_id'].astype(str).str[0:-1].astype(np.int64)     
bids['tract_num'] = bids['census_id'].astype(str).str[0:-1].astype(np.int64)

In [5]:
def county_tostr(county):
    if county<100000000000:
        return str(county)
    else:
        return str(county) 

In [18]:
awarded=bids.loc[bids['assigned_support']>0,['item','assigned_support','t+l_weight']]
awarded_item =awarded.groupby(['item']).mean()
awarded_item =awarded_item.reset_index()
merged=eligible_cbg.merge(awarded_item, how='left', left_on=['item'], right_on=['item'])
merged.head()

Unnamed: 0,auction_id,item,state,county,census_id,reserve_price,locations,tract_num,assigned_support,t+l_weight
0,904,AL-001-0207001,AL,Autauga,10010207001,6172.0,24,1001020700,2468.8,20.0
1,904,AL-001-0208011,AL,Autauga,10010208011,16548.0,50,1001020801,6784.68,20.0
2,904,AL-001-0208012,AL,Autauga,10010208012,3350.0,21,1001020801,1373.5,20.0
3,904,AL-001-0208021,AL,Autauga,10010208021,41396.0,395,1001020802,24837.6,0.0
4,904,AL-001-0209001,AL,Autauga,10010209001,92636.0,504,1001020900,55581.6,0.0


In [19]:
#Calculate FCC's expenditure
merged['assigned_support2']=np.nan
merged.loc[merged['assigned_support']>0,'assigned_support2']=merged.loc[merged['assigned_support']>0,'assigned_support']+merged.loc[merged['assigned_support']>0,'t+l_weight']*merged.loc[merged['assigned_support']>0,'reserve_price']/100
merged.loc[merged['assigned_support2'].isna(),'assigned_support2']=merged.loc[merged['assigned_support2'].isna(),'reserve_price']
merged.head()

Unnamed: 0,auction_id,item,state,county,census_id,reserve_price,locations,tract_num,assigned_support,t+l_weight,assigned_support2
0,904,AL-001-0207001,AL,Autauga,10010207001,6172.0,24,1001020700,2468.8,20.0,3703.2
1,904,AL-001-0208011,AL,Autauga,10010208011,16548.0,50,1001020801,6784.68,20.0,10094.28
2,904,AL-001-0208012,AL,Autauga,10010208012,3350.0,21,1001020801,1373.5,20.0,2043.5
3,904,AL-001-0208021,AL,Autauga,10010208021,41396.0,395,1001020802,24837.6,0.0,24837.6
4,904,AL-001-0209001,AL,Autauga,10010209001,92636.0,504,1001020900,55581.6,0.0,55581.6


# Add Census Data

In [None]:
# Census tract data
import geopandas as gpd
gdb = 'Census Tracts/a0000000b.gdbtable'
censustracts_dem = gpd.read_file(gdb)
censustracts_dem.columns
#census tract, poppulation, black, hispanic, not hispanic, housing units, occupied housing units
demographics=censustracts_dem[['GEOID10','DP0080001','DP0080004','DP0100001','DP0100002','DP0100007','DP0180001','DP0180002','Shape_Length', 'Shape_Area']]
demographics['GEOID10']=pd.to_numeric(demographics['GEOID10'], errors='coerce')

In [20]:
merged['item']=pd.to_numeric(merged['census_id'], errors='coerce')
merged.head()

Unnamed: 0,auction_id,item,state,county,census_id,reserve_price,locations,tract_num,assigned_support,t+l_weight,assigned_support2
0,904,10010207001,AL,Autauga,10010207001,6172.0,24,1001020700,2468.8,20.0,3703.2
1,904,10010208011,AL,Autauga,10010208011,16548.0,50,1001020801,6784.68,20.0,10094.28
2,904,10010208012,AL,Autauga,10010208012,3350.0,21,1001020801,1373.5,20.0,2043.5
3,904,10010208021,AL,Autauga,10010208021,41396.0,395,1001020802,24837.6,0.0,24837.6
4,904,10010209001,AL,Autauga,10010209001,92636.0,504,1001020900,55581.6,0.0,55581.6


In [24]:
# Census block group data - ACS
geo_cbg=pd.read_csv('Data/safegraph_open_census_data_2019/metadata/cbg_geographic_data.csv')  
cbg_b01=pd.read_csv('Data/safegraph_open_census_data_2019/data/cbg_b01.csv') 
cbg_b02=pd.read_csv('Data/safegraph_open_census_data_2019/data/cbg_b02.csv')  
cbg_b19=pd.read_csv('Data/safegraph_open_census_data_2019/data/cbg_b19.csv')  
cbg_b28=pd.read_csv('Data/safegraph_open_census_data_2019/data/cbg_b28.csv')  

geo_cbg=geo_cbg[['census_block_group','amount_land']]
cbg_b01=cbg_b01[['census_block_group','B01001e1']] #Total Pop
cbg_b02=cbg_b02[['census_block_group','B02001e2','B02001e3']] #White, black
cbg_b19=cbg_b19[['census_block_group','B19001e1','B19013e1','B19301e1']] #Household income, median income, per capita income
cbg_b28=cbg_b28[['census_block_group','B28002e1','B28002e2','B28002e4','B28002e7','B28002e9','B28002e13']] #Internet Access

cbg_cov=geo_cbg.merge(cbg_b01, how='outer', left_on=['census_block_group'], right_on=['census_block_group'])
cbg_cov=cbg_cov.merge(cbg_b02, how='outer', left_on=['census_block_group'], right_on=['census_block_group'])
cbg_cov=cbg_cov.merge(cbg_b19, how='outer', left_on=['census_block_group'], right_on=['census_block_group'])
cbg_cov=cbg_cov.merge(cbg_b28, how='outer', left_on=['census_block_group'], right_on=['census_block_group'])

merged2=merged.merge(cbg_cov, how='left', left_on=['item'], right_on=['census_block_group'])
merged2['density']=merged2['B01001e1']/(merged2['amount_land']+0.000000001)
merged2['Internet']=merged2['B28002e2']/merged2['B28002e1']
merged2['Broadband']=merged2['B28002e4']/merged2['B28002e1']
merged2['Cable_Fiber']=merged2['B28002e7']/merged2['B28002e1']
merged2['Satellite']=merged2['B28002e9']/merged2['B28002e1']
merged2['NoAccess']=merged2['B28002e13']/merged2['B28002e1']
merged2['Med_income']=merged2['B19013e1']
merged2['PerCapita_income']=merged2['B19301e1']

merged2.describe()

Unnamed: 0,auction_id,item,census_id,reserve_price,locations,tract_num,assigned_support,t+l_weight,assigned_support2,census_block_group,...,B28002e9,B28002e13,density,Internet,Broadband,Cable_Fiber,Satellite,NoAccess,Med_income,PerCapita_income
count,61766.0,61766.0,61766.0,61766.0,61766.0,61766.0,57172.0,57172.0,61766.0,61741.0,...,61741.0,61741.0,61741.0,61411.0,61411.0,61411.0,61411.0,61411.0,59893.0,61510.0
mean,904.0,281185100000.0,281185100000.0,42939.37,85.739258,28118510000.0,16145.471061,7.500525,16171.649918,281117500000.0,...,49.138773,91.725498,0.000557,0.788568,0.783195,0.591918,0.090497,0.179601,65355.836408,32423.271728
std,0.0,157710700000.0,157710700000.0,99721.48,155.722567,15771070000.0,40501.891158,9.821904,41319.355684,157627400000.0,...,53.406888,72.295276,0.001339,0.139461,0.140467,0.208754,0.083528,0.129918,33098.070712,16568.744157
min,904.0,10010210000.0,10010210000.0,1.0,1.0,1001021000.0,0.04,0.0,0.04,10010210000.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2499.0,56.0
25%,904.0,170190100000.0,170190100000.0,579.0,3.0,17019010000.0,322.2,0.0,325.8,170190100000.0,...,15.0,40.0,1.8e-05,0.709775,0.702988,0.445813,0.031674,0.081464,43833.0,22440.25
50%,904.0,261635800000.0,261635800000.0,3592.5,16.0,26163580000.0,1739.4,0.0,1691.4,261635700000.0,...,35.0,77.0,8.4e-05,0.807415,0.800546,0.606357,0.068884,0.15956,58182.0,28906.0
75%,904.0,420333300000.0,420333300000.0,32739.75,92.0,42033330000.0,11788.95,20.0,11236.825,420333300000.0,...,67.0,126.0,0.00054,0.892966,0.888374,0.755474,0.125604,0.251005,78224.0,37838.0
max,904.0,691209500000.0,691209500000.0,2171267.0,2168.0,69120950000.0,985532.1,60.0,985532.1,560430000000.0,...,2841.0,1043.0,0.047777,1.0,1.0,1.0,1.0,1.0,250001.0,254873.0


# Area and urban

In [25]:
States=['01','02','04','05','06','08','09','10','11','12','13','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38','39','40','41','42','44','45','46','47','48','49','50','51','53','54','55','56','60','66','69','72','78']

count=0
for i in States:
    count=count+1
    path='GIS/tl_2019_'+i+'_tabblock10.dbf'
    if count==1:
        table=DBF(path)
        tiger = pd.DataFrame(data=table)
        #tiger['ALAND10'] = tiger['ALAND10']+tiger['AWATER10']   
        tiger = tiger[['GEOID10','UR10','ALAND10','INTPTLAT10', 'INTPTLON10']] 
        tiger['GEOID10']=pd.to_numeric(tiger['GEOID10'], errors='coerce')
        tiger = eligible.merge(tiger, how='inner', left_on='block_id', right_on='GEOID10')
        tiger = tiger[['GEOID10','UR10','ALAND10','INTPTLAT10', 'INTPTLON10','cbg_id','state_abbreviation', 'county_name']]
    
    else:
        table=DBF(path)
        aux = pd.DataFrame(data=table)
        aux = aux[['GEOID10','UR10','ALAND10','INTPTLAT10', 'INTPTLON10']]
        aux['GEOID10']=pd.to_numeric(aux['GEOID10'], errors='coerce')
        aux = eligible.merge(aux, how='inner', left_on='block_id', right_on='GEOID10')
        aux = aux[['GEOID10','UR10','ALAND10','INTPTLAT10', 'INTPTLON10','cbg_id','state_abbreviation', 'county_name']]
        tiger=tiger.append(aux)                      

tiger['constant']=1
tiger.loc[tiger['UR10']=='U', 'UR10'] = 1
tiger.loc[tiger['UR10']=='R', 'UR10'] = 0
tiger['UR10']=pd.to_numeric(tiger['UR10'], errors='coerce')
tiger['INTPTLAT10']=pd.to_numeric(tiger['INTPTLAT10'], errors='coerce')
tiger['INTPTLON10']=pd.to_numeric(tiger['INTPTLON10'], errors='coerce')
tiger2=tiger.groupby('cbg_id').sum()
tiger2=tiger2.reset_index()
tiger2['UR10']=tiger2['UR10']/tiger2['constant']
tiger2['INTPTLAT10']=tiger2['INTPTLAT10']/tiger2['constant']
tiger2['INTPTLON10']=tiger2['INTPTLON10']/tiger2['constant']
tiger2['ALAND10']=tiger2['ALAND10']/1000000

In [26]:
tiger2['census_id']=tiger2['cbg_id']
tiger2=tiger2[['census_id','UR10','ALAND10','INTPTLAT10', 'INTPTLON10']]

In [27]:
merged2 = merged2.merge(tiger2, how='left', left_on='census_id', right_on='census_id')

In [28]:
merged2.columns
merged2.head()

Index(['auction_id', 'item', 'state', 'county', 'census_id', 'reserve_price',
       'locations', 'tract_num', 'assigned_support', 't+l_weight',
       'assigned_support2', 'census_block_group', 'amount_land', 'B01001e1',
       'B02001e2', 'B02001e3', 'B19001e1', 'B19013e1', 'B19301e1', 'B28002e1',
       'B28002e2', 'B28002e4', 'B28002e7', 'B28002e9', 'B28002e13', 'density',
       'Internet', 'Broadband', 'Cable_Fiber', 'Satellite', 'NoAccess',
       'Med_income', 'PerCapita_income', 'UR10', 'ALAND10', 'INTPTLAT10',
       'INTPTLON10'],
      dtype='object')

Unnamed: 0,auction_id,item,state,county,census_id,reserve_price,locations,tract_num,assigned_support,t+l_weight,...,Broadband,Cable_Fiber,Satellite,NoAccess,Med_income,PerCapita_income,UR10,ALAND10,INTPTLAT10,INTPTLON10
0,904,10010207001,AL,Autauga,10010207001,6172.0,24,1001020700,2468.8,20.0,...,0.731567,0.540323,0.041475,0.258065,38636.0,21519.0,0.0,9.493489,32.418811,-86.423272
1,904,10010208011,AL,Autauga,10010208011,16548.0,50,1001020801,6784.68,20.0,...,0.795302,0.61745,0.16443,0.181208,62000.0,27893.0,0.0,12.654161,32.406738,-86.545288
2,904,10010208012,AL,Autauga,10010208012,3350.0,21,1001020801,1373.5,20.0,...,0.960591,0.786946,0.078818,0.018473,87813.0,37536.0,0.0,2.692645,32.449986,-86.547772
3,904,10010208021,AL,Autauga,10010208021,41396.0,395,1001020802,24837.6,0.0,...,0.66164,0.257304,0.134779,0.33836,48942.0,20703.0,0.0,40.692545,32.524626,-86.561237
4,904,10010209001,AL,Autauga,10010209001,92636.0,504,1001020900,55581.6,0.0,...,0.641026,0.242735,0.2,0.358974,39263.0,25836.0,0.0,94.204621,32.646931,-86.58359


# Fixed Effect by Census Tract

In [48]:
dataset=merged2[['census_id','tract_num','assigned_support2','locations','reserve_price','B01001e1','B02001e3','Cable_Fiber','PerCapita_income','amount_land','ALAND10','UR10']]
dataset['expenditure_perloc']=np.log(dataset['assigned_support2']/dataset['locations'])
dataset['reserve_price']=np.log(dataset['reserve_price'])
dataset['locations']=np.log(dataset['locations'])
dataset['area']=np.log(dataset['ALAND10'])
dataset['area_cbg']=np.log(dataset['amount_land']+0.00001)
dataset['cable_cbg']=np.log(dataset['Cable_Fiber']+1)
dataset['inc_percapita']=np.log(dataset['PerCapita_income']+1)
dataset['people_cbg']=np.log(dataset['B01001e1']+1)
dataset['people_black_cbg']=np.log(dataset['B02001e3']+1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset['expenditure_perloc']=np.log(dataset['assigned_support2']/dataset['locations'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset['reserve_price']=np.log(dataset['reserve_price'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset['locations']=np.log(dataset['locations'])
A value i

In [49]:
datasetfinal=dataset[['census_id','tract_num','reserve_price','expenditure_perloc','locations','people_cbg','people_black_cbg','area','area_cbg','inc_percapita','cable_cbg','UR10']]
mean_tract=datasetfinal.groupby('tract_num').mean()
mean_tract=mean_tract.reset_index()
datasetfinal = datasetfinal.merge(mean_tract, how='left', left_on='tract_num', right_on='tract_num')
datasetfinal.columns
datasetfinal.head()

Index(['census_id_x', 'tract_num', 'reserve_price_x', 'expenditure_perloc_x',
       'locations_x', 'people_cbg_x', 'people_black_cbg_x', 'area_x',
       'area_cbg_x', 'inc_percapita_x', 'cable_cbg_x', 'UR10_x', 'census_id_y',
       'reserve_price_y', 'expenditure_perloc_y', 'locations_y',
       'people_cbg_y', 'people_black_cbg_y', 'area_y', 'area_cbg_y',
       'inc_percapita_y', 'cable_cbg_y', 'UR10_y'],
      dtype='object')

Unnamed: 0,census_id_x,tract_num,reserve_price_x,expenditure_perloc_x,locations_x,people_cbg_x,people_black_cbg_x,area_x,area_cbg_x,inc_percapita_x,...,reserve_price_y,expenditure_perloc_y,locations_y,people_cbg_y,people_black_cbg_y,area_y,area_cbg_y,inc_percapita_y,cable_cbg_y,UR10_y
0,10010207001,1001020700,8.727778,5.038899,3.178054,7.644441,6.244167,2.250606,16.790826,9.976738,...,8.727778,5.038899,3.178054,7.644441,6.244167,2.250606,16.790826,9.976738,0.431992,0.0
1,10010208011,1001020801,9.714021,5.307701,3.912023,6.758095,5.480639,2.537986,18.209333,10.236167,...,8.915368,4.942799,3.478273,7.257359,5.384453,1.764255,17.896768,10.384625,0.530679,0.0
2,10010208012,1001020801,8.116716,4.577897,3.044522,7.756623,5.288267,0.990524,17.584203,10.533082,...,8.915368,4.942799,3.478273,7.257359,5.384453,1.764255,17.896768,10.384625,0.530679,0.0
3,10010208021,1001020802,10.63094,4.141228,5.978886,8.058011,6.190315,3.706045,18.437322,9.938082,...,10.63094,4.141228,5.978886,8.058011,6.190315,3.706045,18.437322,9.938082,0.22897,0.0
4,10010209001,1001020900,11.436433,4.703031,6.222576,7.167809,5.313206,4.545469,18.442201,10.159563,...,10.845648,4.787608,5.549977,7.304129,4.644681,3.538167,18.065161,10.355394,0.345789,0.0


In [50]:
datasetfinal=datasetfinal.loc[datasetfinal['census_id_x']!=datasetfinal['census_id_y'],:]
datasetfinal=datasetfinal.dropna()
datasetfinal.describe()

Unnamed: 0,census_id_x,tract_num,reserve_price_x,expenditure_perloc_x,locations_x,people_cbg_x,people_black_cbg_x,area_x,area_cbg_x,inc_percapita_x,...,reserve_price_y,expenditure_perloc_y,locations_y,people_cbg_y,people_black_cbg_y,area_y,area_cbg_y,inc_percapita_y,cable_cbg_y,UR10_y
count,42707.0,42707.0,42707.0,42707.0,42707.0,42707.0,42707.0,42707.0,42707.0,42707.0,...,42707.0,42707.0,42707.0,42707.0,42707.0,42707.0,42707.0,42707.0,42707.0,42707.0
mean,285074800000.0,28507480000.0,8.646647,4.682918,3.141297,7.143939,2.665947,-0.097139,16.769664,10.27168,...,8.640363,4.682459,3.135904,7.140882,2.667035,-0.104057,16.762978,10.270729,0.442386,0.328435
std,156972400000.0,15697240000.0,2.564387,1.221744,1.953063,0.553187,2.463829,3.581694,1.951355,0.429425,...,2.211669,0.970603,1.665107,0.431189,2.12132,3.171379,1.776807,0.37498,0.11958,0.40993
min,10010210000.0,1001021000.0,0.0,-3.218876,0.0,3.178054,0.0,-12.716898,10.59596,6.971669,...,1.319529,-1.226588,0.0,3.04225,0.0,-10.617046,10.855269,7.33671,0.011434,0.0
25%,170409600000.0,17040960000.0,6.740519,3.988984,1.609438,6.782192,0.0,-3.047131,15.339329,10.019135,...,6.906579,4.128892,1.76318,6.862557,0.89588,-2.709416,15.493661,10.046146,0.361489,0.0
50%,270550200000.0,27055020000.0,8.700015,4.804489,3.135494,7.129298,2.639057,0.38223,17.034812,10.259412,...,8.660379,4.76395,3.089265,7.113837,2.138333,0.233709,17.080457,10.24707,0.450575,0.03125
75%,420430300000.0,42043030000.0,10.774906,5.522888,4.836282,7.489971,4.867534,2.993497,18.202056,10.514475,...,10.454089,5.336048,4.482258,7.39786,4.485534,2.603825,18.043301,10.473589,0.53222,0.733333
max,560430000000.0,56043000000.0,14.419305,7.889834,7.68156,10.980297,9.036344,8.295645,23.690203,12.290915,...,14.096027,7.524088,7.432212,10.091779,9.004467,7.591956,23.015646,12.142706,0.693147,1.0


In [57]:
aux=datasetfinal.copy()
y=aux['expenditure_perloc_x']-aux['expenditure_perloc_y']
x=aux[['locations_x', 'people_cbg_x','area_x', 'area_cbg_x', 'inc_percapita_x','cable_cbg_x','UR10_x']]
x['locations_x']=aux['locations_x']-aux['locations_y']
x['people_cbg_x']=aux['people_cbg_x']-aux['people_cbg_y']
x['area_x']=aux['area_x']-aux['area_y']
x['area_cbg_x']=aux['area_cbg_x']-aux['area_cbg_y']
x['inc_percapita_x']=aux['inc_percapita_x']-aux['inc_percapita_y']
x['cable_cbg_x']=aux['cable_cbg_x']-aux['cable_cbg_y']
x['UR10_x']=aux['UR10_x']-aux['UR10_y']


model = sm.OLS(y, x)
#results = model.fit(cov_type='cluster', cov_kwds={'groups': aux['county']})
results = model.fit()
print(results.summary())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x['locations_x']=aux['locations_x']-aux['locations_y']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x['people_cbg_x']=aux['people_cbg_x']-aux['people_cbg_y']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x['area_x']=aux['area_x']-aux['area_y']
A value is trying to be set on a copy of a slice fro

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.232
Model:                            OLS   Adj. R-squared (uncentered):              0.232
Method:                 Least Squares   F-statistic:                              1840.
Date:                Sun, 14 Aug 2022   Prob (F-statistic):                        0.00
Time:                        22:34:19   Log-Likelihood:                         -42334.
No. Observations:               42707   AIC:                                  8.468e+04
Df Residuals:                   42700   BIC:                                  8.474e+04
Df Model:                           7                                                  
Covariance Type:            nonrobust                                                  
                      coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------

In [58]:
aux=datasetfinal.copy()
y=aux['reserve_price_x']-aux['reserve_price_y']
x=aux[['locations_x', 'people_cbg_x', 'area_x', 'area_cbg_x', 'inc_percapita_x','cable_cbg_x','UR10_x']]
x['locations_x']=aux['locations_x']-aux['locations_y']
x['people_cbg_x']=aux['people_cbg_x']-aux['people_cbg_y']
x['area_x']=aux['area_x']-aux['area_y']
x['area_cbg_x']=aux['area_cbg_x']-aux['area_cbg_y']
x['inc_percapita_x']=aux['inc_percapita_x']-aux['inc_percapita_y']
x['cable_cbg_x']=aux['cable_cbg_x']-aux['cable_cbg_y']
x['UR10_x']=aux['UR10_x']-aux['UR10_y']


model = sm.OLS(y, x)
#results = model.fit(cov_type='cluster', cov_kwds={'groups': aux['county']})
results = model.fit()
print(results.summary())

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.780
Model:                            OLS   Adj. R-squared (uncentered):              0.780
Method:                 Least Squares   F-statistic:                          2.160e+04
Date:                Sun, 14 Aug 2022   Prob (F-statistic):                        0.00
Time:                        22:34:19   Log-Likelihood:                         -39409.
No. Observations:               42707   AIC:                                  7.883e+04
Df Residuals:                   42700   BIC:                                  7.889e+04
Df Model:                           7                                                  
Covariance Type:            nonrobust                                                  
                      coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x['locations_x']=aux['locations_x']-aux['locations_y']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x['people_cbg_x']=aux['people_cbg_x']-aux['people_cbg_y']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x['area_x']=aux['area_x']-aux['area_y']
A value is trying to be set on a copy of a slice fro

In [59]:
aux=datasetfinal.copy()
y=aux['expenditure_perloc_x']-aux['expenditure_perloc_y']
x=aux[['locations_x', 'people_cbg_x','area_x', 'area_cbg_x', 'inc_percapita_x','cable_cbg_x','UR10_x']]
x['reserve_price']=results.resid
x['locations_x']=aux['locations_x']-aux['locations_y']
x['people_cbg_x']=aux['people_cbg_x']-aux['people_cbg_y']
x['area_x']=aux['area_x']-aux['area_y']
x['area_cbg_x']=aux['area_cbg_x']-aux['area_cbg_y']
x['inc_percapita_x']=aux['inc_percapita_x']-aux['inc_percapita_y']
x['cable_cbg_x']=aux['cable_cbg_x']-aux['cable_cbg_y']
x['UR10_x']=aux['UR10_x']-aux['UR10_y']


model = sm.OLS(y, x)
#results = model.fit(cov_type='cluster', cov_kwds={'groups': aux['county']})
results = model.fit()
print(results.summary())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x['reserve_price']=results.resid
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x['locations_x']=aux['locations_x']-aux['locations_y']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x['people_cbg_x']=aux['people_cbg_x']-aux['people_cbg_y']
A value is trying to be set on a copy of a slice from a Dat

                                 OLS Regression Results                                
Dep. Variable:                      y   R-squared (uncentered):                   0.769
Model:                            OLS   Adj. R-squared (uncentered):              0.769
Method:                 Least Squares   F-statistic:                          1.781e+04
Date:                Sun, 14 Aug 2022   Prob (F-statistic):                        0.00
Time:                        22:34:19   Log-Likelihood:                         -16637.
No. Observations:               42707   AIC:                                  3.329e+04
Df Residuals:                   42699   BIC:                                  3.336e+04
Df Model:                           8                                                  
Covariance Type:            nonrobust                                                  
                      coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------