In [3]:
import pandas as pd

# show a lot of data
pd.set_option('display.max_rows', 128)

# read in the pr gov data using first row as the header and the space-like characters (regex) as delimiters
pr_data = pd.read_csv("pr.data.0.Current", header=[0], delimiter=r"\s+")

# drop the unused footnote column
pr_data.drop(columns=["footnote_codes"], inplace=True)
pr_data

Unnamed: 0,series_id,year,period,value
0,PRS30006011,1995,Q01,2.600
1,PRS30006011,1995,Q02,2.100
2,PRS30006011,1995,Q03,0.900
3,PRS30006011,1995,Q04,0.100
4,PRS30006011,1995,Q05,1.400
...,...,...,...,...
32404,PRS88003203,2020,Q03,110.307
32405,PRS88003203,2020,Q04,112.010
32406,PRS88003203,2020,Q05,112.635
32407,PRS88003203,2021,Q01,112.442


In [75]:
# For every series_id, we find the best year, the year with the max/largest sum of "value" for all quarters in that year. 
pr_summed_by_year = pr_data.groupby(['series_id', 'year']).sum().reset_index()
pr_summed_by_year

Unnamed: 0,series_id,year,value
0,PRS30006011,1995,7.100
1,PRS30006011,1996,-0.500
2,PRS30006011,1997,4.400
3,PRS30006011,1998,4.200
4,PRS30006011,1999,-7.700
...,...,...,...
7519,PRS88003203,2017,543.784
7520,PRS88003203,2018,551.651
7521,PRS88003203,2019,564.407
7522,PRS88003203,2020,563.117


In [76]:
pr_summed_by_year[pr_summed_by_year['series_id'] == 'PRS30006012']

Unnamed: 0,series_id,year,value
27,PRS30006012,1995,1.8
28,PRS30006012,1996,0.6
29,PRS30006012,1997,6.8
30,PRS30006012,1998,-0.1
31,PRS30006012,1999,-8.0
32,PRS30006012,2000,-1.6
33,PRS30006012,2001,-35.4
34,PRS30006012,2002,-28.0
35,PRS30006012,2003,-22.3
36,PRS30006012,2004,-1.1


In [72]:
best_year_idx = pr_summed_by_year.drop(columns='year').groupby(['series_id']).idxmax().reset_index(drop=True)
pr_best_years = pr_summed_by_year.loc[best_year_idx.value]
pr_best_years

Unnamed: 0,series_id,year,value
17,PRS30006011,2012,9.500
46,PRS30006012,2014,8.800
57,PRS30006013,1998,733.290
96,PRS30006021,2010,14.200
123,PRS30006022,2010,11.200
...,...,...,...
7396,PRS88003192,2002,285.200
7435,PRS88003193,2014,520.840
7455,PRS88003201,2007,21.700
7482,PRS88003202,2007,23.900


In [102]:
import json
with open("datausa.api.json") as f:
	usa_json=json.load(f)
	
usa_data=pd.DataFrame(usa_json['data'])

pop_data=usa_data[["Year","Population"]].rename(columns={"Year": "year"})
pop_data

Unnamed: 0,year,Population
0,2019,328239523
1,2018,327167439
2,2017,325719178
3,2016,323127515
4,2015,321418821
5,2014,318857056
6,2013,316128839


In [106]:
record = pr_data[(pr_data["series_id"] == "PRS30006032") & (pr_data["period"] == "Q01")]
record.join(pop_data,on="year", how="inner", lsuffix='y', rsuffix='r')

Unnamed: 0,series_id,yeary,period,value,yearr,Population
924,PRS30006032,1995,Q01,0.1,,
929,PRS30006032,1996,Q01,-4.4,,
934,PRS30006032,1997,Q01,2.6,,
939,PRS30006032,1998,Q01,1.0,,
944,PRS30006032,1999,Q01,-4.2,,
949,PRS30006032,2000,Q01,0.1,,
954,PRS30006032,2001,Q01,-6.0,,
959,PRS30006032,2002,Q01,-7.0,,
964,PRS30006032,2003,Q01,-5.8,,
969,PRS30006032,2004,Q01,2.3,,
