# Prior to Running This File:

Ensure you have tabula-py installed on your machine. Uncomment and run the first cell if you do not have it installed.

In [230]:
#!pip install tabula-py

In [231]:
import tabula
import pandas as pd
import re
import math
import sys
from gdpPDFCleaning import *

# GDP 95-00 Data

In [232]:
# create instance of yearly GDP data
GDP95_01 = YearlyGDP('GDP1995-2001.pdf', 8, 1995, 2000, '2001')

In [233]:
# testing that it was initialized with an empty DataFrame
GDP95_01.df

In [234]:
# initializeDataFrame() method reads in the PDF, assigns the first DataFrame in the 
# list as the df (only one table per page), and renames the columns. Lastly, it returns 
# the DataFrame for viewing
GDP95_01.initializeDataFrame()

Unnamed: 0,Area,1995,1996,1997,1998,1999,2000,2001,NaN
0,,,,,,,,,2000 2001
1,United States.......................,7309516.0,7715901.0,8224960.0,8750174.0,9251541.0,9891187.0,10137190.0,100 100
2,New England....................,416166.0,439596.0,471336.0,503940.0,533324.0,582874.0,594686.0,5.9 5.9
3,Connecticut......................,118645.0,124157.0,134968.0,142701.0,149010.0,161929.0,166165.0,1.6 1.6
4,Maine...............................,27987.0,28925.0,30409.0,32208.0,34102.0,36276.0,37449.0,0.4 0.4
5,Massachusetts..................,197469.0,210127.0,223571.0,241369.0,257802.0,283072.0,287802.0,2.9 2.8
6,New Hampshire...............,32388.0,35068.0,37470.0,40529.0,43360.0,47385.0,47183.0,0.5 0.5
7,Rhode Island....................,25703.0,26656.0,29409.0,30838.0,31895.0,36086.0,36939.0,0.4 0.4
8,Vermont...........................,13974.0,14662.0,15510.0,16294.0,17155.0,18124.0,19149.0,0.2 0.2
9,Mideast..............................,1403270.0,1471796.0,1547124.0,1649536.0,1720155.0,1837583.0,1900223.0,18.6 18.7


In [235]:
GDP95_01.cleanStates('Area')

Unnamed: 0,Area,1995,1996,1997,1998,1999,2000,2001,NaN,Cleaned Area
0,,,,,,,,,2000 2001,
1,United States.......................,7309516.0,7715901.0,8224960.0,8750174.0,9251541.0,9891187.0,10137190.0,100 100,United States
2,New England....................,416166.0,439596.0,471336.0,503940.0,533324.0,582874.0,594686.0,5.9 5.9,New England
3,Connecticut......................,118645.0,124157.0,134968.0,142701.0,149010.0,161929.0,166165.0,1.6 1.6,Connecticut
4,Maine...............................,27987.0,28925.0,30409.0,32208.0,34102.0,36276.0,37449.0,0.4 0.4,Maine
5,Massachusetts..................,197469.0,210127.0,223571.0,241369.0,257802.0,283072.0,287802.0,2.9 2.8,Massachusetts
6,New Hampshire...............,32388.0,35068.0,37470.0,40529.0,43360.0,47385.0,47183.0,0.5 0.5,New Hampshire
7,Rhode Island....................,25703.0,26656.0,29409.0,30838.0,31895.0,36086.0,36939.0,0.4 0.4,Rhode Island
8,Vermont...........................,13974.0,14662.0,15510.0,16294.0,17155.0,18124.0,19149.0,0.2 0.2,Vermont
9,Mideast..............................,1403270.0,1471796.0,1547124.0,1649536.0,1720155.0,1837583.0,1900223.0,18.6 18.7,Mideast


In [236]:
GDP95_01.addGeoLocColumn('Cleaned Area')

Unnamed: 0,Area,1995,1996,1997,1998,1999,2000,2001,NaN,Cleaned Area,Geo Loc
0,,,,,,,,,2000 2001,,
1,United States.......................,7309516.0,7715901.0,8224960.0,8750174.0,9251541.0,9891187.0,10137190.0,100 100,United States,
2,New England....................,416166.0,439596.0,471336.0,503940.0,533324.0,582874.0,594686.0,5.9 5.9,New England,
3,Connecticut......................,118645.0,124157.0,134968.0,142701.0,149010.0,161929.0,166165.0,1.6 1.6,Connecticut,New England
4,Maine...............................,27987.0,28925.0,30409.0,32208.0,34102.0,36276.0,37449.0,0.4 0.4,Maine,New England
5,Massachusetts..................,197469.0,210127.0,223571.0,241369.0,257802.0,283072.0,287802.0,2.9 2.8,Massachusetts,New England
6,New Hampshire...............,32388.0,35068.0,37470.0,40529.0,43360.0,47385.0,47183.0,0.5 0.5,New Hampshire,New England
7,Rhode Island....................,25703.0,26656.0,29409.0,30838.0,31895.0,36086.0,36939.0,0.4 0.4,Rhode Island,New England
8,Vermont...........................,13974.0,14662.0,15510.0,16294.0,17155.0,18124.0,19149.0,0.2 0.2,Vermont,New England
9,Mideast..............................,1403270.0,1471796.0,1547124.0,1649536.0,1720155.0,1837583.0,1900223.0,18.6 18.7,Mideast,


In [237]:
GDP95_01.restructureDataFrame('Cleaned Area')

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,1995,Alaska,2001,24791.0,Far West
1,1995,Alabama,2001,95514.0,Southeast
2,1995,Arkansas,2001,53809.0,Southeast
3,1995,Arizona,2001,104586.0,Southwest
4,1995,California,2001,925931.0,Far West
5,1995,Colorado,2001,109021.0,Rocky Mountain
6,1995,Connecticut,2001,118645.0,New England
7,1995,District of Columbia,2001,48408.0,Mideast
8,1995,Delaware,2001,27575.0,Mideast
9,1995,Florida,2001,344771.0,Southeast


In [238]:
GDP95_01.df

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,1995,Alaska,2001,24791.0,Far West
1,1995,Alabama,2001,95514.0,Southeast
2,1995,Arkansas,2001,53809.0,Southeast
3,1995,Arizona,2001,104586.0,Southwest
4,1995,California,2001,925931.0,Far West
5,1995,Colorado,2001,109021.0,Rocky Mountain
6,1995,Connecticut,2001,118645.0,New England
7,1995,District of Columbia,2001,48408.0,Mideast
8,1995,Delaware,2001,27575.0,Mideast
9,1995,Florida,2001,344771.0,Southeast


# GDP 01 - 03 Data 

In [239]:
# create instance of yearly GDP data
GDP01_03 = YearlyGDP('GDP2001-2003.pdf', 5, 2001, 2003, '2004')

In [240]:
# testing that it was initialized with an empty DataFrame
GDP01_03.df

In [241]:
# initializeDataFrame() method reads in the PDF, assigns the first DataFrame in the 
# list as the df (only one table per page), and renames the columns. Lastly, it returns 
# the DataFrame for viewing
GDP01_03.initializeDataFrame()

Unnamed: 0,Area,2001,2002 2003,2004,2001.0,2002 2003.1,2004.1
0,United States .........,10058156,"10,412,244 10,923,849",11649827,100.0,100.0 100.0,100.0
1,New England ............,584487,"596,017 620,136",664181,5.8,5.7 5.7,5.7
2,Connecticut .............,165434,"167,235 174,085",187086,1.6,1.6 1.6,1.6
3,Maine .....................,37094,"39,027 40,829",43279,0.4,0.4 0.4,0.4
4,Massachusetts .........,283422,"287,191 297,113",317684,2.8,2.8 2.7,2.7
5,New Hampshire .........,44394,"46,106 48,202",52097,0.4,0.4 0.4,0.4
6,Rhode Island ............,35489,"37,040 39,363",41921,0.4,0.4 0.4,0.4
7,Vermont ..................,18656,"19,419 20,544",22114,0.2,0.2 0.2,0.2
8,Mideast .....................,1868057,"1,922,516 2,010,011",2140662,18.6,18.5 18.4,18.4
9,Delaware ..................,45049,"46,991 50,486",54500,0.4,0.5 0.5,0.5


In [242]:
GDP01_03.splitColumns(2, '2002', '2003')

Unnamed: 0,Area,2001,2002 2003,2004,2001.0,2002 2003.1,2004.1,2002,2003
0,United States .........,10058156,"10,412,244 10,923,849",11649827,100.0,100.0 100.0,100.0,10412244,10923849
1,New England ............,584487,"596,017 620,136",664181,5.8,5.7 5.7,5.7,596017,620136
2,Connecticut .............,165434,"167,235 174,085",187086,1.6,1.6 1.6,1.6,167235,174085
3,Maine .....................,37094,"39,027 40,829",43279,0.4,0.4 0.4,0.4,39027,40829
4,Massachusetts .........,283422,"287,191 297,113",317684,2.8,2.8 2.7,2.7,287191,297113
5,New Hampshire .........,44394,"46,106 48,202",52097,0.4,0.4 0.4,0.4,46106,48202
6,Rhode Island ............,35489,"37,040 39,363",41921,0.4,0.4 0.4,0.4,37040,39363
7,Vermont ..................,18656,"19,419 20,544",22114,0.2,0.2 0.2,0.2,19419,20544
8,Mideast .....................,1868057,"1,922,516 2,010,011",2140662,18.6,18.5 18.4,18.4,1922516,2010011
9,Delaware ..................,45049,"46,991 50,486",54500,0.4,0.5 0.5,0.5,46991,50486


In [243]:
GDP01_03.splitColumns(5, '2002 %', '2003 %')

Unnamed: 0,Area,2001,2002 2003,2004,2001.0,2002 2003.1,2004.1,2002,2003,2002 %,2003 %
0,United States .........,10058156,"10,412,244 10,923,849",11649827,100.0,100.0 100.0,100.0,10412244,10923849,100.0,100.0
1,New England ............,584487,"596,017 620,136",664181,5.8,5.7 5.7,5.7,596017,620136,5.7,5.7
2,Connecticut .............,165434,"167,235 174,085",187086,1.6,1.6 1.6,1.6,167235,174085,1.6,1.6
3,Maine .....................,37094,"39,027 40,829",43279,0.4,0.4 0.4,0.4,39027,40829,0.4,0.4
4,Massachusetts .........,283422,"287,191 297,113",317684,2.8,2.8 2.7,2.7,287191,297113,2.8,2.7
5,New Hampshire .........,44394,"46,106 48,202",52097,0.4,0.4 0.4,0.4,46106,48202,0.4,0.4
6,Rhode Island ............,35489,"37,040 39,363",41921,0.4,0.4 0.4,0.4,37040,39363,0.4,0.4
7,Vermont ..................,18656,"19,419 20,544",22114,0.2,0.2 0.2,0.2,19419,20544,0.2,0.2
8,Mideast .....................,1868057,"1,922,516 2,010,011",2140662,18.6,18.5 18.4,18.4,1922516,2010011,18.5,18.4
9,Delaware ..................,45049,"46,991 50,486",54500,0.4,0.5 0.5,0.5,46991,50486,0.5,0.5


In [244]:
GDP01_03.cleanStates('Area')

Unnamed: 0,Area,2001,2002 2003,2004,2001.0,2002 2003.1,2004.1,2002,2003,2002 %,2003 %,Cleaned Area
0,United States .........,10058156,"10,412,244 10,923,849",11649827,100.0,100.0 100.0,100.0,10412244,10923849,100.0,100.0,United States
1,New England ............,584487,"596,017 620,136",664181,5.8,5.7 5.7,5.7,596017,620136,5.7,5.7,New England
2,Connecticut .............,165434,"167,235 174,085",187086,1.6,1.6 1.6,1.6,167235,174085,1.6,1.6,Connecticut
3,Maine .....................,37094,"39,027 40,829",43279,0.4,0.4 0.4,0.4,39027,40829,0.4,0.4,Maine
4,Massachusetts .........,283422,"287,191 297,113",317684,2.8,2.8 2.7,2.7,287191,297113,2.8,2.7,Massachusetts
5,New Hampshire .........,44394,"46,106 48,202",52097,0.4,0.4 0.4,0.4,46106,48202,0.4,0.4,New Hampshire
6,Rhode Island ............,35489,"37,040 39,363",41921,0.4,0.4 0.4,0.4,37040,39363,0.4,0.4,Rhode Island
7,Vermont ..................,18656,"19,419 20,544",22114,0.2,0.2 0.2,0.2,19419,20544,0.2,0.2,Vermont
8,Mideast .....................,1868057,"1,922,516 2,010,011",2140662,18.6,18.5 18.4,18.4,1922516,2010011,18.5,18.4,Mideast
9,Delaware ..................,45049,"46,991 50,486",54500,0.4,0.5 0.5,0.5,46991,50486,0.5,0.5,Delaware


In [245]:
GDP01_03.addGeoLocColumn('Cleaned Area')

Unnamed: 0,Area,2001,2002 2003,2004,2001.0,2002 2003.1,2004.1,2002,2003,2002 %,2003 %,Cleaned Area,Geo Loc
0,United States .........,10058156,"10,412,244 10,923,849",11649827,100.0,100.0 100.0,100.0,10412244,10923849,100.0,100.0,United States,
1,New England ............,584487,"596,017 620,136",664181,5.8,5.7 5.7,5.7,596017,620136,5.7,5.7,New England,
2,Connecticut .............,165434,"167,235 174,085",187086,1.6,1.6 1.6,1.6,167235,174085,1.6,1.6,Connecticut,New England
3,Maine .....................,37094,"39,027 40,829",43279,0.4,0.4 0.4,0.4,39027,40829,0.4,0.4,Maine,New England
4,Massachusetts .........,283422,"287,191 297,113",317684,2.8,2.8 2.7,2.7,287191,297113,2.8,2.7,Massachusetts,New England
5,New Hampshire .........,44394,"46,106 48,202",52097,0.4,0.4 0.4,0.4,46106,48202,0.4,0.4,New Hampshire,New England
6,Rhode Island ............,35489,"37,040 39,363",41921,0.4,0.4 0.4,0.4,37040,39363,0.4,0.4,Rhode Island,New England
7,Vermont ..................,18656,"19,419 20,544",22114,0.2,0.2 0.2,0.2,19419,20544,0.2,0.2,Vermont,New England
8,Mideast .....................,1868057,"1,922,516 2,010,011",2140662,18.6,18.5 18.4,18.4,1922516,2010011,18.5,18.4,Mideast,
9,Delaware ..................,45049,"46,991 50,486",54500,0.4,0.5 0.5,0.5,46991,50486,0.5,0.5,Delaware,Mideast


In [246]:
GDP01_03.restructureDataFrame('Cleaned Area')

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,2001,Alaska,2004,27358.0,Far West
1,2001,Alabama,2004,118263.0,Southeast
2,2001,Arkansas,2004,68574.0,Southeast
3,2001,Arizona,2004,164263.0,Southwest
4,2001,California,2004,1307880.0,Far West
5,2001,Colorado,2004,177526.0,Rocky Mountain
6,2001,Connecticut,2004,165434.0,New England
7,2001,District of Columbia,2004,63223.0,Mideast
8,2001,Delaware,2004,45049.0,Mideast
9,2001,Florida,2004,496861.0,Southeast


In [247]:
GDP01_03.df

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,2001,Alaska,2004,27358.0,Far West
1,2001,Alabama,2004,118263.0,Southeast
2,2001,Arkansas,2004,68574.0,Southeast
3,2001,Arizona,2004,164263.0,Southwest
4,2001,California,2004,1307880.0,Far West
5,2001,Colorado,2004,177526.0,Rocky Mountain
6,2001,Connecticut,2004,165434.0,New England
7,2001,District of Columbia,2004,63223.0,Mideast
8,2001,Delaware,2004,45049.0,Mideast
9,2001,Florida,2004,496861.0,Southeast


#  GDP 04 - 06

In [248]:
# create instance of yearly GDP data
GDP04_06 = YearlyGDP('GDP2004-2006.pdf', 11, 2004, 2006, '2007')

In [249]:
# testing that it was initialized with an empty DataFrame
GDP04_06.df

In [250]:
# initializeDataFrame() method reads in the PDF, assigns the first DataFrame in the 
# list as the df (only one table per page), and renames the columns. Lastly, it returns 
# the DataFrame for viewing
GDP04_06.initializeDataFrame()

Unnamed: 0,Area,2004,2005 2006,2007,2004.0,2005 2006.1,2007.1
0,United States...............,11607041,"12,346,871 13,119,938",13743021,100.0,100.0 100.0,100.0
1,New England..................,647473,"674,562 712,051",744672,5.6,5.5 5.4,5.4
2,Connecticut.....................,182112,"193,281 204,964",216266,1.6,1.6 1.6,1.6
3,Maine............................,43191,"44,364 46,340",48108,0.4,0.4 0.4,0.4
4,Massachusetts..................,306827,"317,626 335,313",351514,2.6,2.6 2.6,2.6
5,New Hampshire...............,51432,"53,468 56,073",57341,0.4,0.4 0.4,0.4
6,Rhode Island..................,42073,"43,078 45,733",46900,0.4,0.3 0.3,0.3
7,Vermont........................,21839,"22,745 23,628",24543,0.2,0.2 0.2,0.2
8,Mideast...........................,2124891,"2,245,718 2,390,856",2522240,18.3,18.2 18.2,18.4
9,Delaware........................,52305,"57,334 59,589",60118,0.5,0.5 0.5,0.4


In [251]:
GDP04_06.splitColumns(2, '2005', '2006')

Unnamed: 0,Area,2004,2005 2006,2007,2004.0,2005 2006.1,2007.1,2005,2006
0,United States...............,11607041,"12,346,871 13,119,938",13743021,100.0,100.0 100.0,100.0,12346871,13119938
1,New England..................,647473,"674,562 712,051",744672,5.6,5.5 5.4,5.4,674562,712051
2,Connecticut.....................,182112,"193,281 204,964",216266,1.6,1.6 1.6,1.6,193281,204964
3,Maine............................,43191,"44,364 46,340",48108,0.4,0.4 0.4,0.4,44364,46340
4,Massachusetts..................,306827,"317,626 335,313",351514,2.6,2.6 2.6,2.6,317626,335313
5,New Hampshire...............,51432,"53,468 56,073",57341,0.4,0.4 0.4,0.4,53468,56073
6,Rhode Island..................,42073,"43,078 45,733",46900,0.4,0.3 0.3,0.3,43078,45733
7,Vermont........................,21839,"22,745 23,628",24543,0.2,0.2 0.2,0.2,22745,23628
8,Mideast...........................,2124891,"2,245,718 2,390,856",2522240,18.3,18.2 18.2,18.4,2245718,2390856
9,Delaware........................,52305,"57,334 59,589",60118,0.5,0.5 0.5,0.4,57334,59589


In [252]:
GDP04_06.splitColumns(5, '2005 %', '2006 %')

Unnamed: 0,Area,2004,2005 2006,2007,2004.0,2005 2006.1,2007.1,2005,2006,2005 %,2006 %
0,United States...............,11607041,"12,346,871 13,119,938",13743021,100.0,100.0 100.0,100.0,12346871,13119938,100.0,100.0
1,New England..................,647473,"674,562 712,051",744672,5.6,5.5 5.4,5.4,674562,712051,5.5,5.4
2,Connecticut.....................,182112,"193,281 204,964",216266,1.6,1.6 1.6,1.6,193281,204964,1.6,1.6
3,Maine............................,43191,"44,364 46,340",48108,0.4,0.4 0.4,0.4,44364,46340,0.4,0.4
4,Massachusetts..................,306827,"317,626 335,313",351514,2.6,2.6 2.6,2.6,317626,335313,2.6,2.6
5,New Hampshire...............,51432,"53,468 56,073",57341,0.4,0.4 0.4,0.4,53468,56073,0.4,0.4
6,Rhode Island..................,42073,"43,078 45,733",46900,0.4,0.3 0.3,0.3,43078,45733,0.3,0.3
7,Vermont........................,21839,"22,745 23,628",24543,0.2,0.2 0.2,0.2,22745,23628,0.2,0.2
8,Mideast...........................,2124891,"2,245,718 2,390,856",2522240,18.3,18.2 18.2,18.4,2245718,2390856,18.2,18.2
9,Delaware........................,52305,"57,334 59,589",60118,0.5,0.5 0.5,0.4,57334,59589,0.5,0.5


In [253]:
GDP04_06.cleanStates('Area')

Unnamed: 0,Area,2004,2005 2006,2007,2004.0,2005 2006.1,2007.1,2005,2006,2005 %,2006 %,Cleaned Area
0,United States...............,11607041,"12,346,871 13,119,938",13743021,100.0,100.0 100.0,100.0,12346871,13119938,100.0,100.0,United States
1,New England..................,647473,"674,562 712,051",744672,5.6,5.5 5.4,5.4,674562,712051,5.5,5.4,New England
2,Connecticut.....................,182112,"193,281 204,964",216266,1.6,1.6 1.6,1.6,193281,204964,1.6,1.6,Connecticut
3,Maine............................,43191,"44,364 46,340",48108,0.4,0.4 0.4,0.4,44364,46340,0.4,0.4,Maine
4,Massachusetts..................,306827,"317,626 335,313",351514,2.6,2.6 2.6,2.6,317626,335313,2.6,2.6,Massachusetts
5,New Hampshire...............,51432,"53,468 56,073",57341,0.4,0.4 0.4,0.4,53468,56073,0.4,0.4,New Hampshire
6,Rhode Island..................,42073,"43,078 45,733",46900,0.4,0.3 0.3,0.3,43078,45733,0.3,0.3,Rhode Island
7,Vermont........................,21839,"22,745 23,628",24543,0.2,0.2 0.2,0.2,22745,23628,0.2,0.2,Vermont
8,Mideast...........................,2124891,"2,245,718 2,390,856",2522240,18.3,18.2 18.2,18.4,2245718,2390856,18.2,18.2,Mideast
9,Delaware........................,52305,"57,334 59,589",60118,0.5,0.5 0.5,0.4,57334,59589,0.5,0.5,Delaware


In [254]:
GDP04_06.addGeoLocColumn('Cleaned Area')

Unnamed: 0,Area,2004,2005 2006,2007,2004.0,2005 2006.1,2007.1,2005,2006,2005 %,2006 %,Cleaned Area,Geo Loc
0,United States...............,11607041,"12,346,871 13,119,938",13743021,100.0,100.0 100.0,100.0,12346871,13119938,100.0,100.0,United States,
1,New England..................,647473,"674,562 712,051",744672,5.6,5.5 5.4,5.4,674562,712051,5.5,5.4,New England,
2,Connecticut.....................,182112,"193,281 204,964",216266,1.6,1.6 1.6,1.6,193281,204964,1.6,1.6,Connecticut,New England
3,Maine............................,43191,"44,364 46,340",48108,0.4,0.4 0.4,0.4,44364,46340,0.4,0.4,Maine,New England
4,Massachusetts..................,306827,"317,626 335,313",351514,2.6,2.6 2.6,2.6,317626,335313,2.6,2.6,Massachusetts,New England
5,New Hampshire...............,51432,"53,468 56,073",57341,0.4,0.4 0.4,0.4,53468,56073,0.4,0.4,New Hampshire,New England
6,Rhode Island..................,42073,"43,078 45,733",46900,0.4,0.3 0.3,0.3,43078,45733,0.3,0.3,Rhode Island,New England
7,Vermont........................,21839,"22,745 23,628",24543,0.2,0.2 0.2,0.2,22745,23628,0.2,0.2,Vermont,New England
8,Mideast...........................,2124891,"2,245,718 2,390,856",2522240,18.3,18.2 18.2,18.4,2245718,2390856,18.2,18.2,Mideast,
9,Delaware........................,52305,"57,334 59,589",60118,0.5,0.5 0.5,0.4,57334,59589,0.5,0.5,Delaware,Mideast


In [255]:
GDP04_06.restructureDataFrame('Cleaned Area')

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,2004,Alaska,2007,35102.0,Far West
1,2004,Alabama,2007,141527.0,Southeast
2,2004,Arkansas,2007,82137.0,Southeast
3,2004,Arizona,2007,193448.0,Southwest
4,2004,California,2007,1519443.0,Far West
5,2004,Colorado,2007,197329.0,Rocky Mountain
6,2004,Connecticut,2007,182112.0,New England
7,2004,District of Columbia,2007,77913.0,Mideast
8,2004,Delaware,2007,52305.0,Mideast
9,2004,Florida,2007,607284.0,Southeast


In [256]:
GDP04_06.df

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,2004,Alaska,2007,35102.0,Far West
1,2004,Alabama,2007,141527.0,Southeast
2,2004,Arkansas,2007,82137.0,Southeast
3,2004,Arizona,2007,193448.0,Southwest
4,2004,California,2007,1519443.0,Far West
5,2004,Colorado,2007,197329.0,Rocky Mountain
6,2004,Connecticut,2007,182112.0,New England
7,2004,District of Columbia,2007,77913.0,Mideast
8,2004,Delaware,2007,52305.0,Mideast
9,2004,Florida,2007,607284.0,Southeast


# GDP 07-09 Data

In [257]:
# create instance of yearly GDP data
GDP07_09 = YearlyGDP('GDP2007-2009.pdf', 9, 2007, 2009, '2010')

In [258]:
# testing that it was initialized with an empty DataFrame
GDP07_09.df

In [259]:
# initializeDataFrame() method reads in the PDF, assigns the first DataFrame in the 
# list as the df (only one table per page), and renames the columns. Lastly, it returns 
# the DataFrame for viewing
GDP07_09.initializeDataFrame()

Unnamed: 0,Area,2007,2008 2009,2010,2007.0,2008 2009.1,2010.1
0,United States..................,13969323,"14,270,462 14,014,849",14551782,100.0,100.0 100.0,100.0
1,New England......................,754306,"772,347 769,308",802771,5.4,5.4 5.5,5.5
2,Connecticut........................,222498,"225,958 227,550",237261,1.6,1.6 1.6,1.6
3,Maine.................................,49195,"49,972 50,039",51643,0.4,0.4 0.4,0.4
4,Massachusetts...................,353329,"365,623 360,538",378729,2.5,2.6 2.6,2.6
5,New Hampshire.................,57856,"58,780 59,086",60283,0.4,0.4 0.4,0.4
6,Rhode Island.....................,47334,"47,378 47,470",49234,0.3,0.3 0.3,0.3
7,Vermont.............................,24093,"24,636 24,625",25620,0.2,0.2 0.2,0.2
8,Mideast...............................,2515483,"2,575,532 2,557,257",2677426,18.0,18.0 18.2,18.4
9,Delaware...........................,60108,"58,674 60,660",62280,0.4,0.4 0.4,0.4


In [260]:
GDP07_09.splitColumns(2, '2008', '2009')

Unnamed: 0,Area,2007,2008 2009,2010,2007.0,2008 2009.1,2010.1,2008,2009
0,United States..................,13969323,"14,270,462 14,014,849",14551782,100.0,100.0 100.0,100.0,14270462,14014849
1,New England......................,754306,"772,347 769,308",802771,5.4,5.4 5.5,5.5,772347,769308
2,Connecticut........................,222498,"225,958 227,550",237261,1.6,1.6 1.6,1.6,225958,227550
3,Maine.................................,49195,"49,972 50,039",51643,0.4,0.4 0.4,0.4,49972,50039
4,Massachusetts...................,353329,"365,623 360,538",378729,2.5,2.6 2.6,2.6,365623,360538
5,New Hampshire.................,57856,"58,780 59,086",60283,0.4,0.4 0.4,0.4,58780,59086
6,Rhode Island.....................,47334,"47,378 47,470",49234,0.3,0.3 0.3,0.3,47378,47470
7,Vermont.............................,24093,"24,636 24,625",25620,0.2,0.2 0.2,0.2,24636,24625
8,Mideast...............................,2515483,"2,575,532 2,557,257",2677426,18.0,18.0 18.2,18.4,2575532,2557257
9,Delaware...........................,60108,"58,674 60,660",62280,0.4,0.4 0.4,0.4,58674,60660


In [261]:
GDP07_09.splitColumns(5, '2008 %', '2009 %')

Unnamed: 0,Area,2007,2008 2009,2010,2007.0,2008 2009.1,2010.1,2008,2009,2008 %,2009 %
0,United States..................,13969323,"14,270,462 14,014,849",14551782,100.0,100.0 100.0,100.0,14270462,14014849,100.0,100.0
1,New England......................,754306,"772,347 769,308",802771,5.4,5.4 5.5,5.5,772347,769308,5.4,5.5
2,Connecticut........................,222498,"225,958 227,550",237261,1.6,1.6 1.6,1.6,225958,227550,1.6,1.6
3,Maine.................................,49195,"49,972 50,039",51643,0.4,0.4 0.4,0.4,49972,50039,0.4,0.4
4,Massachusetts...................,353329,"365,623 360,538",378729,2.5,2.6 2.6,2.6,365623,360538,2.6,2.6
5,New Hampshire.................,57856,"58,780 59,086",60283,0.4,0.4 0.4,0.4,58780,59086,0.4,0.4
6,Rhode Island.....................,47334,"47,378 47,470",49234,0.3,0.3 0.3,0.3,47378,47470,0.3,0.3
7,Vermont.............................,24093,"24,636 24,625",25620,0.2,0.2 0.2,0.2,24636,24625,0.2,0.2
8,Mideast...............................,2515483,"2,575,532 2,557,257",2677426,18.0,18.0 18.2,18.4,2575532,2557257,18.0,18.2
9,Delaware...........................,60108,"58,674 60,660",62280,0.4,0.4 0.4,0.4,58674,60660,0.4,0.4


In [262]:
GDP07_09.cleanStates('Area')

Unnamed: 0,Area,2007,2008 2009,2010,2007.0,2008 2009.1,2010.1,2008,2009,2008 %,2009 %,Cleaned Area
0,United States..................,13969323,"14,270,462 14,014,849",14551782,100.0,100.0 100.0,100.0,14270462,14014849,100.0,100.0,United States
1,New England......................,754306,"772,347 769,308",802771,5.4,5.4 5.5,5.5,772347,769308,5.4,5.5,New England
2,Connecticut........................,222498,"225,958 227,550",237261,1.6,1.6 1.6,1.6,225958,227550,1.6,1.6,Connecticut
3,Maine.................................,49195,"49,972 50,039",51643,0.4,0.4 0.4,0.4,49972,50039,0.4,0.4,Maine
4,Massachusetts...................,353329,"365,623 360,538",378729,2.5,2.6 2.6,2.6,365623,360538,2.6,2.6,Massachusetts
5,New Hampshire.................,57856,"58,780 59,086",60283,0.4,0.4 0.4,0.4,58780,59086,0.4,0.4,New Hampshire
6,Rhode Island.....................,47334,"47,378 47,470",49234,0.3,0.3 0.3,0.3,47378,47470,0.3,0.3,Rhode Island
7,Vermont.............................,24093,"24,636 24,625",25620,0.2,0.2 0.2,0.2,24636,24625,0.2,0.2,Vermont
8,Mideast...............................,2515483,"2,575,532 2,557,257",2677426,18.0,18.0 18.2,18.4,2575532,2557257,18.0,18.2,Mideast
9,Delaware...........................,60108,"58,674 60,660",62280,0.4,0.4 0.4,0.4,58674,60660,0.4,0.4,Delaware


In [263]:
GDP07_09.addGeoLocColumn('Cleaned Area')

Unnamed: 0,Area,2007,2008 2009,2010,2007.0,2008 2009.1,2010.1,2008,2009,2008 %,2009 %,Cleaned Area,Geo Loc
0,United States..................,13969323,"14,270,462 14,014,849",14551782,100.0,100.0 100.0,100.0,14270462,14014849,100.0,100.0,United States,
1,New England......................,754306,"772,347 769,308",802771,5.4,5.4 5.5,5.5,772347,769308,5.4,5.5,New England,
2,Connecticut........................,222498,"225,958 227,550",237261,1.6,1.6 1.6,1.6,225958,227550,1.6,1.6,Connecticut,New England
3,Maine.................................,49195,"49,972 50,039",51643,0.4,0.4 0.4,0.4,49972,50039,0.4,0.4,Maine,New England
4,Massachusetts...................,353329,"365,623 360,538",378729,2.5,2.6 2.6,2.6,365623,360538,2.6,2.6,Massachusetts,New England
5,New Hampshire.................,57856,"58,780 59,086",60283,0.4,0.4 0.4,0.4,58780,59086,0.4,0.4,New Hampshire,New England
6,Rhode Island.....................,47334,"47,378 47,470",49234,0.3,0.3 0.3,0.3,47378,47470,0.3,0.3,Rhode Island,New England
7,Vermont.............................,24093,"24,636 24,625",25620,0.2,0.2 0.2,0.2,24636,24625,0.2,0.2,Vermont,New England
8,Mideast...............................,2515483,"2,575,532 2,557,257",2677426,18.0,18.0 18.2,18.4,2575532,2557257,18.0,18.2,Mideast,
9,Delaware...........................,60108,"58,674 60,660",62280,0.4,0.4 0.4,0.4,58674,60660,0.4,0.4,Delaware,Mideast


In [264]:
GDP07_09.restructureDataFrame('Cleaned Area')

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,2007,Alaska,2010,44587.0,Far West
1,2007,Alabama,2010,165981.0,Southeast
2,2007,Arkansas,2010,97187.0,Southeast
3,2007,Arizona,2010,260122.0,Southwest
4,2007,California,2010,1874783.0,Far West
5,2007,Colorado,2010,242900.0,Rocky Mountain
6,2007,Connecticut,2010,222498.0,New England
7,2007,District of Columbia,2010,92342.0,Mideast
8,2007,Delaware,2010,60108.0,Mideast
9,2007,Florida,2010,759572.0,Southeast


# GDP 10-12 Data

In [265]:
# create instance of yearly GDP data
GDP10_12 = YearlyGDP('GDP2010-2012.pdf', 10, 2010, 2012, '2013')

In [266]:
# testing that it was initialized with an empty DataFrame
GDP10_12.df

In [267]:
# initializeDataFrame() method reads in the PDF, assigns the first DataFrame in the 
# list as the df (only one table per page), and renames the columns. Lastly, it returns 
# the DataFrame for viewing
GDP10_12.initializeDataFrame()

Unnamed: 0,Area,2010,2011 2012,2013,2010.0,2011 2012.1,2013.1
0,United States/1/...............,14862637,"15,431,583 16,141,152",16701415,100.0,100.0 100.0,100.0
1,New England ...............,823311,"842,182 874,201",900870,5.5,5.5 5.4,5.4
2,Connecticut ...............,233781,"235,121 242,930",249251,1.6,1.5 1.5,1.5
3,Maine .......................,51470,"51,756 53,235",54755,0.3,0.3 0.3,0.3
4,Massachusetts ...........,399603,"413,716 431,937",446323,2.7,2.7 2.7,2.7
5,New Hampshire .........,62622,"64,122 66,111",67848,0.4,0.4 0.4,0.4
6,Rhode Island .............,49265,"49,921 51,566",53184,0.3,0.3 0.3,0.3
7,Vermont ....................,26570,"27,545 28,422",29509,0.2,0.2 0.2,0.2
8,Mideast ......................,2766975,"2,841,050 2,948,377",3017145,18.6,18.4 18.3,18.1
9,Delaware ..................,57628,"58,612 60,650",62703,0.4,0.4 0.4,0.4


In [268]:
GDP10_12.splitColumns(2, '2011', '2012')

Unnamed: 0,Area,2010,2011 2012,2013,2010.0,2011 2012.1,2013.1,2011,2012
0,United States/1/...............,14862637,"15,431,583 16,141,152",16701415,100.0,100.0 100.0,100.0,15431583,16141152
1,New England ...............,823311,"842,182 874,201",900870,5.5,5.5 5.4,5.4,842182,874201
2,Connecticut ...............,233781,"235,121 242,930",249251,1.6,1.5 1.5,1.5,235121,242930
3,Maine .......................,51470,"51,756 53,235",54755,0.3,0.3 0.3,0.3,51756,53235
4,Massachusetts ...........,399603,"413,716 431,937",446323,2.7,2.7 2.7,2.7,413716,431937
5,New Hampshire .........,62622,"64,122 66,111",67848,0.4,0.4 0.4,0.4,64122,66111
6,Rhode Island .............,49265,"49,921 51,566",53184,0.3,0.3 0.3,0.3,49921,51566
7,Vermont ....................,26570,"27,545 28,422",29509,0.2,0.2 0.2,0.2,27545,28422
8,Mideast ......................,2766975,"2,841,050 2,948,377",3017145,18.6,18.4 18.3,18.1,2841050,2948377
9,Delaware ..................,57628,"58,612 60,650",62703,0.4,0.4 0.4,0.4,58612,60650


In [269]:
GDP10_12.splitColumns(5, '2011 %', '2012 %')

Unnamed: 0,Area,2010,2011 2012,2013,2010.0,2011 2012.1,2013.1,2011,2012,2011 %,2012 %
0,United States/1/...............,14862637,"15,431,583 16,141,152",16701415,100.0,100.0 100.0,100.0,15431583,16141152,100.0,100.0
1,New England ...............,823311,"842,182 874,201",900870,5.5,5.5 5.4,5.4,842182,874201,5.5,5.4
2,Connecticut ...............,233781,"235,121 242,930",249251,1.6,1.5 1.5,1.5,235121,242930,1.5,1.5
3,Maine .......................,51470,"51,756 53,235",54755,0.3,0.3 0.3,0.3,51756,53235,0.3,0.3
4,Massachusetts ...........,399603,"413,716 431,937",446323,2.7,2.7 2.7,2.7,413716,431937,2.7,2.7
5,New Hampshire .........,62622,"64,122 66,111",67848,0.4,0.4 0.4,0.4,64122,66111,0.4,0.4
6,Rhode Island .............,49265,"49,921 51,566",53184,0.3,0.3 0.3,0.3,49921,51566,0.3,0.3
7,Vermont ....................,26570,"27,545 28,422",29509,0.2,0.2 0.2,0.2,27545,28422,0.2,0.2
8,Mideast ......................,2766975,"2,841,050 2,948,377",3017145,18.6,18.4 18.3,18.1,2841050,2948377,18.4,18.3
9,Delaware ..................,57628,"58,612 60,650",62703,0.4,0.4 0.4,0.4,58612,60650,0.4,0.4


In [270]:
GDP10_12.cleanStates('Area')

Unnamed: 0,Area,2010,2011 2012,2013,2010.0,2011 2012.1,2013.1,2011,2012,2011 %,2012 %,Cleaned Area
0,United States/1/...............,14862637,"15,431,583 16,141,152",16701415,100.0,100.0 100.0,100.0,15431583,16141152,100.0,100.0,United States
1,New England ...............,823311,"842,182 874,201",900870,5.5,5.5 5.4,5.4,842182,874201,5.5,5.4,New England
2,Connecticut ...............,233781,"235,121 242,930",249251,1.6,1.5 1.5,1.5,235121,242930,1.5,1.5,Connecticut
3,Maine .......................,51470,"51,756 53,235",54755,0.3,0.3 0.3,0.3,51756,53235,0.3,0.3,Maine
4,Massachusetts ...........,399603,"413,716 431,937",446323,2.7,2.7 2.7,2.7,413716,431937,2.7,2.7,Massachusetts
5,New Hampshire .........,62622,"64,122 66,111",67848,0.4,0.4 0.4,0.4,64122,66111,0.4,0.4,New Hampshire
6,Rhode Island .............,49265,"49,921 51,566",53184,0.3,0.3 0.3,0.3,49921,51566,0.3,0.3,Rhode Island
7,Vermont ....................,26570,"27,545 28,422",29509,0.2,0.2 0.2,0.2,27545,28422,0.2,0.2,Vermont
8,Mideast ......................,2766975,"2,841,050 2,948,377",3017145,18.6,18.4 18.3,18.1,2841050,2948377,18.4,18.3,Mideast
9,Delaware ..................,57628,"58,612 60,650",62703,0.4,0.4 0.4,0.4,58612,60650,0.4,0.4,Delaware


In [271]:
GDP10_12.addGeoLocColumn('Cleaned Area')

Unnamed: 0,Area,2010,2011 2012,2013,2010.0,2011 2012.1,2013.1,2011,2012,2011 %,2012 %,Cleaned Area,Geo Loc
0,United States/1/...............,14862637,"15,431,583 16,141,152",16701415,100.0,100.0 100.0,100.0,15431583,16141152,100.0,100.0,United States,
1,New England ...............,823311,"842,182 874,201",900870,5.5,5.5 5.4,5.4,842182,874201,5.5,5.4,New England,
2,Connecticut ...............,233781,"235,121 242,930",249251,1.6,1.5 1.5,1.5,235121,242930,1.5,1.5,Connecticut,New England
3,Maine .......................,51470,"51,756 53,235",54755,0.3,0.3 0.3,0.3,51756,53235,0.3,0.3,Maine,New England
4,Massachusetts ...........,399603,"413,716 431,937",446323,2.7,2.7 2.7,2.7,413716,431937,2.7,2.7,Massachusetts,New England
5,New Hampshire .........,62622,"64,122 66,111",67848,0.4,0.4 0.4,0.4,64122,66111,0.4,0.4,New Hampshire,New England
6,Rhode Island .............,49265,"49,921 51,566",53184,0.3,0.3 0.3,0.3,49921,51566,0.3,0.3,Rhode Island,New England
7,Vermont ....................,26570,"27,545 28,422",29509,0.2,0.2 0.2,0.2,27545,28422,0.2,0.2,Vermont,New England
8,Mideast ......................,2766975,"2,841,050 2,948,377",3017145,18.6,18.4 18.3,18.1,2841050,2948377,18.4,18.3,Mideast,
9,Delaware ..................,57628,"58,612 60,650",62703,0.4,0.4 0.4,0.4,58612,60650,0.4,0.4,Delaware,Mideast


In [272]:
GDP10_12.restructureDataFrame('Cleaned Area')

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,2010,Alaska,2013,53251.0,Far West
1,2010,Alabama,2013,175734.0,Southeast
2,2010,Arkansas,2013,111355.0,Southeast
3,2010,Arizona,2013,247752.0,Southwest
4,2010,California,2013,1953411.0,Far West
5,2010,Colorado,2013,256628.0,Rocky Mountain
6,2010,Connecticut,2013,233781.0,New England
7,2010,District of Columbia,2013,106615.0,Mideast
8,2010,Delaware,2013,57628.0,Mideast
9,2010,Florida,2013,728604.0,Southeast


# GDP 11-13 Data

In [273]:
# create instance of yearly GDP data
GDP11_13 = YearlyGDP('GDP2011-2013.pdf', 9, 2013, 2013, '2014')

In [274]:
# testing that it was initialized with an empty DataFrame
GDP11_13.df

In [275]:
# initializeDataFrame() method reads in the PDF, assigns the first DataFrame in the 
# list as the df (only one table per page), and renames the columns. Lastly, it returns 
# the DataFrame for viewing
GDP11_13.initializeDataFrame()

Unnamed: 0,Area,2011,2012 2013,2014,2011.0,2012 2013.1,2014.1
0,United States1,15416873,"16,060,678 16,665,215",17316314,100.0,100.0 100.0,100.0
1,New England,840420,"867,593 893,812",924937,5.5,5.4 5.4,5.3
2,Connecticut,233960,"239,878 246,897",253036,1.5,1.5 1.5,1.5
3,Maine,52007,"53,180 54,609",55838,0.3,0.3 0.3,0.3
4,Massachusetts,412653,"428,350 441,467",459937,2.7,2.7 2.6,2.7
5,New Hampshire,64246,"66,490 68,701",71552,0.4,0.4 0.4,0.4
6,Rhode Island,49932,"51,346 53,300",54960,0.3,0.3 0.3,0.3
7,Vermont,27622,"28,348 28,838",29613,0.2,0.2 0.2,0.2
8,Mideast,2826317,"2,944,778 3,031,201",3143366,18.3,18.3 18.2,18.2
9,Delaware,59275,"59,104 60,816",62756,0.4,0.4 0.4,0.4


In [276]:
GDP11_13.splitColumns(2, '2012', '2013')

Unnamed: 0,Area,2011,2012 2013,2014,2011.0,2012 2013.1,2014.1,2012,2013
0,United States1,15416873,"16,060,678 16,665,215",17316314,100.0,100.0 100.0,100.0,16060678,16665215
1,New England,840420,"867,593 893,812",924937,5.5,5.4 5.4,5.3,867593,893812
2,Connecticut,233960,"239,878 246,897",253036,1.5,1.5 1.5,1.5,239878,246897
3,Maine,52007,"53,180 54,609",55838,0.3,0.3 0.3,0.3,53180,54609
4,Massachusetts,412653,"428,350 441,467",459937,2.7,2.7 2.6,2.7,428350,441467
5,New Hampshire,64246,"66,490 68,701",71552,0.4,0.4 0.4,0.4,66490,68701
6,Rhode Island,49932,"51,346 53,300",54960,0.3,0.3 0.3,0.3,51346,53300
7,Vermont,27622,"28,348 28,838",29613,0.2,0.2 0.2,0.2,28348,28838
8,Mideast,2826317,"2,944,778 3,031,201",3143366,18.3,18.3 18.2,18.2,2944778,3031201
9,Delaware,59275,"59,104 60,816",62756,0.4,0.4 0.4,0.4,59104,60816


In [277]:
GDP11_13.splitColumns(5, '2012 %', '2013 %')

Unnamed: 0,Area,2011,2012 2013,2014,2011.0,2012 2013.1,2014.1,2012,2013,2012 %,2013 %
0,United States1,15416873,"16,060,678 16,665,215",17316314,100.0,100.0 100.0,100.0,16060678,16665215,100.0,100.0
1,New England,840420,"867,593 893,812",924937,5.5,5.4 5.4,5.3,867593,893812,5.4,5.4
2,Connecticut,233960,"239,878 246,897",253036,1.5,1.5 1.5,1.5,239878,246897,1.5,1.5
3,Maine,52007,"53,180 54,609",55838,0.3,0.3 0.3,0.3,53180,54609,0.3,0.3
4,Massachusetts,412653,"428,350 441,467",459937,2.7,2.7 2.6,2.7,428350,441467,2.7,2.6
5,New Hampshire,64246,"66,490 68,701",71552,0.4,0.4 0.4,0.4,66490,68701,0.4,0.4
6,Rhode Island,49932,"51,346 53,300",54960,0.3,0.3 0.3,0.3,51346,53300,0.3,0.3
7,Vermont,27622,"28,348 28,838",29613,0.2,0.2 0.2,0.2,28348,28838,0.2,0.2
8,Mideast,2826317,"2,944,778 3,031,201",3143366,18.3,18.3 18.2,18.2,2944778,3031201,18.3,18.2
9,Delaware,59275,"59,104 60,816",62756,0.4,0.4 0.4,0.4,59104,60816,0.4,0.4


In [278]:
GDP11_13.cleanStates('Area')

Unnamed: 0,Area,2011,2012 2013,2014,2011.0,2012 2013.1,2014.1,2012,2013,2012 %,2013 %,Cleaned Area
0,United States1,15416873,"16,060,678 16,665,215",17316314,100.0,100.0 100.0,100.0,16060678,16665215,100.0,100.0,United States
1,New England,840420,"867,593 893,812",924937,5.5,5.4 5.4,5.3,867593,893812,5.4,5.4,New England
2,Connecticut,233960,"239,878 246,897",253036,1.5,1.5 1.5,1.5,239878,246897,1.5,1.5,Connecticut
3,Maine,52007,"53,180 54,609",55838,0.3,0.3 0.3,0.3,53180,54609,0.3,0.3,Maine
4,Massachusetts,412653,"428,350 441,467",459937,2.7,2.7 2.6,2.7,428350,441467,2.7,2.6,Massachusetts
5,New Hampshire,64246,"66,490 68,701",71552,0.4,0.4 0.4,0.4,66490,68701,0.4,0.4,New Hampshire
6,Rhode Island,49932,"51,346 53,300",54960,0.3,0.3 0.3,0.3,51346,53300,0.3,0.3,Rhode Island
7,Vermont,27622,"28,348 28,838",29613,0.2,0.2 0.2,0.2,28348,28838,0.2,0.2,Vermont
8,Mideast,2826317,"2,944,778 3,031,201",3143366,18.3,18.3 18.2,18.2,2944778,3031201,18.3,18.2,Mideast
9,Delaware,59275,"59,104 60,816",62756,0.4,0.4 0.4,0.4,59104,60816,0.4,0.4,Delaware


In [279]:
GDP11_13.addGeoLocColumn('Cleaned Area')

Unnamed: 0,Area,2011,2012 2013,2014,2011.0,2012 2013.1,2014.1,2012,2013,2012 %,2013 %,Cleaned Area,Geo Loc
0,United States1,15416873,"16,060,678 16,665,215",17316314,100.0,100.0 100.0,100.0,16060678,16665215,100.0,100.0,United States,
1,New England,840420,"867,593 893,812",924937,5.5,5.4 5.4,5.3,867593,893812,5.4,5.4,New England,
2,Connecticut,233960,"239,878 246,897",253036,1.5,1.5 1.5,1.5,239878,246897,1.5,1.5,Connecticut,New England
3,Maine,52007,"53,180 54,609",55838,0.3,0.3 0.3,0.3,53180,54609,0.3,0.3,Maine,New England
4,Massachusetts,412653,"428,350 441,467",459937,2.7,2.7 2.6,2.7,428350,441467,2.7,2.6,Massachusetts,New England
5,New Hampshire,64246,"66,490 68,701",71552,0.4,0.4 0.4,0.4,66490,68701,0.4,0.4,New Hampshire,New England
6,Rhode Island,49932,"51,346 53,300",54960,0.3,0.3 0.3,0.3,51346,53300,0.3,0.3,Rhode Island,New England
7,Vermont,27622,"28,348 28,838",29613,0.2,0.2 0.2,0.2,28348,28838,0.2,0.2,Vermont,New England
8,Mideast,2826317,"2,944,778 3,031,201",3143366,18.3,18.3 18.2,18.2,2944778,3031201,18.3,18.2,Mideast,
9,Delaware,59275,"59,104 60,816",62756,0.4,0.4 0.4,0.4,59104,60816,0.4,0.4,Delaware,Mideast


In [280]:
GDP11_13.restructureDataFrame('Cleaned Area')

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,2013,Alaska,2014,57276.0,Far West
1,2013,Alabama,2014,194671.0,Southeast
2,2013,Arkansas,2014,118553.0,Southeast
3,2013,Arizona,2014,274734.0,Southwest
4,2013,California,2014,2212991.0,Far West
5,2013,Colorado,2014,288338.0,Rocky Mountain
6,2013,Connecticut,2014,246897.0,New England
7,2013,District of Columbia,2014,111695.0,Mideast
8,2013,Delaware,2014,60816.0,Mideast
9,2013,Florida,2014,800697.0,Southeast


# 2014 Quarterly Data

In [281]:
GDP2014 = tabula.read_pdf('GDP2014_quarterly.pdf', pages = 7, stream = True)
GDP2014

[                         Unnamed: 0  Unnamed: 1  Unnamed: 2  \
 0                               NaN         NaN         NaN   
 1                               NaN        2014         NaN   
 2                               NaN         NaN           I   
 3     United States1 ..............  17,316,314  16,941,430   
 4       New England ...............     924,937     911,347   
 5       Connecticut ...............     253,036     249,336   
 6       Maine .....................      55,838      54,800   
 7           Massachusetts .........     459,937     454,249   
 8           New Hampshire .........      71,552      69,842   
 9         Rhode Island ............      54,960      54,103   
 10       Vermont ..................      29,613      29,016   
 11    Mideast .....................   3,143,366   3,087,518   
 12      Delaware ..................      62,756      61,094   
 13         District of Columbia ...     115,473     115,482   
 14      Maryland ..................    

In [282]:
GDP2014 = pd.concat(GDP2014, ignore_index = True)

In [283]:
GDP2014

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Millions of dollars,Unnamed: 3,Unnamed: 4,Unnamed: 5,Percent of the U.S.,Unnamed: 6,Unnamed: 7
0,,,,Seasonally adjusted at annual rates,,,,Seasonally adjusted at annual rates,,
1,,2014.0,,2014,,2014.0,,2014,,
2,,,I,II III IV,,,I,II III,IV,
3,United States1 ..............,17316314.0,16941430,"17,225,767 17,497,215 17,600,843",,100.0,,100.0 100.0 100.0,,100.0
4,New England ...............,924937.0,911347,"919,972 929,812 938,616",,5.3,,5.4 5.3 5.3,,5.3
5,Connecticut ...............,253036.0,249336,"252,980 253,856 255,972",,1.5,,1.5 1.5 1.5,,1.5
6,Maine .....................,55838.0,54800,"55,588 56,240 56,726",,0.3,,0.3 0.3 0.3,,0.3
7,Massachusetts .........,459937.0,454249,"455,425 462,636 467,436",,2.7,,2.7 2.6 2.6,,2.7
8,New Hampshire .........,71552.0,69842,"71,895 71,929 72,542",,0.4,,0.4 0.4 0.4,,0.4
9,Rhode Island ............,54960.0,54103,"54,795 55,245 55,697",,0.3,,0.3 0.3 0.3,,0.3


In [284]:
GDP2014 = GDP2014.drop(GDP2014.index[0]).reset_index(drop = True)
GDP2014 = GDP2014.drop(GDP2014.index[0]).reset_index(drop = True)
GDP2014

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Millions of dollars,Unnamed: 3,Unnamed: 4,Unnamed: 5,Percent of the U.S.,Unnamed: 6,Unnamed: 7
0,,,I,II III IV,,,I,II III,IV,
1,United States1 ..............,17316314.0,16941430,"17,225,767 17,497,215 17,600,843",,100.0,,100.0 100.0 100.0,,100.0
2,New England ...............,924937.0,911347,"919,972 929,812 938,616",,5.3,,5.4 5.3 5.3,,5.3
3,Connecticut ...............,253036.0,249336,"252,980 253,856 255,972",,1.5,,1.5 1.5 1.5,,1.5
4,Maine .....................,55838.0,54800,"55,588 56,240 56,726",,0.3,,0.3 0.3 0.3,,0.3
5,Massachusetts .........,459937.0,454249,"455,425 462,636 467,436",,2.7,,2.7 2.6 2.6,,2.7
6,New Hampshire .........,71552.0,69842,"71,895 71,929 72,542",,0.4,,0.4 0.4 0.4,,0.4
7,Rhode Island ............,54960.0,54103,"54,795 55,245 55,697",,0.3,,0.3 0.3 0.3,,0.3
8,Vermont ..................,29613.0,29016,"29,289 29,905 30,244",,0.2,,0.2 0.2 0.2,,0.2
9,Mideast .....................,3143366.0,3087518,"3,124,845 3,172,793 3,188,307",,18.2,,18.2 18.1 18.1,,18.1


In [285]:
GDP2014 = GDP2014.drop(GDP2014.index[0]).reset_index(drop = True)
GDP2014

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Millions of dollars,Unnamed: 3,Unnamed: 4,Unnamed: 5,Percent of the U.S.,Unnamed: 6,Unnamed: 7
0,United States1 ..............,17316314,16941430,"17,225,767 17,497,215 17,600,843",,100.0,,100.0 100.0 100.0,,100.0
1,New England ...............,924937,911347,"919,972 929,812 938,616",,5.3,,5.4 5.3 5.3,,5.3
2,Connecticut ...............,253036,249336,"252,980 253,856 255,972",,1.5,,1.5 1.5 1.5,,1.5
3,Maine .....................,55838,54800,"55,588 56,240 56,726",,0.3,,0.3 0.3 0.3,,0.3
4,Massachusetts .........,459937,454249,"455,425 462,636 467,436",,2.7,,2.7 2.6 2.6,,2.7
5,New Hampshire .........,71552,69842,"71,895 71,929 72,542",,0.4,,0.4 0.4 0.4,,0.4
6,Rhode Island ............,54960,54103,"54,795 55,245 55,697",,0.3,,0.3 0.3 0.3,,0.3
7,Vermont ..................,29613,29016,"29,289 29,905 30,244",,0.2,,0.2 0.2 0.2,,0.2
8,Mideast .....................,3143366,3087518,"3,124,845 3,172,793 3,188,307",,18.2,,18.2 18.1 18.1,,18.1
9,Delaware ..................,62756,61094,"62,375 63,646 63,908",,0.4,,0.4 0.4 0.4,,0.4


In [286]:
for i in GDP2014['Millions of dollars'].index:
    splitList = GDP2014.loc[i, 'Millions of dollars'].split(" ")
    GDP2014.loc[i, '2014 Q2'] = splitList[0]
    GDP2014.loc[i, '2014 Q3'] = splitList[1]
    GDP2014.loc[i, '2014 Q4'] = splitList[2]
GDP2014

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Millions of dollars,Unnamed: 3,Unnamed: 4,Unnamed: 5,Percent of the U.S.,Unnamed: 6,Unnamed: 7,2014 Q2,2014 Q3,2014 Q4
0,United States1 ..............,17316314,16941430,"17,225,767 17,497,215 17,600,843",,100.0,,100.0 100.0 100.0,,100.0,17225767,17497215,17600843
1,New England ...............,924937,911347,"919,972 929,812 938,616",,5.3,,5.4 5.3 5.3,,5.3,919972,929812,938616
2,Connecticut ...............,253036,249336,"252,980 253,856 255,972",,1.5,,1.5 1.5 1.5,,1.5,252980,253856,255972
3,Maine .....................,55838,54800,"55,588 56,240 56,726",,0.3,,0.3 0.3 0.3,,0.3,55588,56240,56726
4,Massachusetts .........,459937,454249,"455,425 462,636 467,436",,2.7,,2.7 2.6 2.6,,2.7,455425,462636,467436
5,New Hampshire .........,71552,69842,"71,895 71,929 72,542",,0.4,,0.4 0.4 0.4,,0.4,71895,71929,72542
6,Rhode Island ............,54960,54103,"54,795 55,245 55,697",,0.3,,0.3 0.3 0.3,,0.3,54795,55245,55697
7,Vermont ..................,29613,29016,"29,289 29,905 30,244",,0.2,,0.2 0.2 0.2,,0.2,29289,29905,30244
8,Mideast .....................,3143366,3087518,"3,124,845 3,172,793 3,188,307",,18.2,,18.2 18.1 18.1,,18.1,3124845,3172793,3188307
9,Delaware ..................,62756,61094,"62,375 63,646 63,908",,0.4,,0.4 0.4 0.4,,0.4,62375,63646,63908


In [287]:
for i in GDP2014['Percent of the U.S.'].index:
    splitList = GDP2014.loc[i, 'Percent of the U.S.'].split(" ")
    GDP2014.loc[i, '2014 Q2 %'] = splitList[0]
    GDP2014.loc[i, '2014 Q3 %'] = splitList[1]
    GDP2014.loc[i, '2014 Q4 %'] = splitList[2]
GDP2014

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Millions of dollars,Unnamed: 3,Unnamed: 4,Unnamed: 5,Percent of the U.S.,Unnamed: 6,Unnamed: 7,2014 Q2,2014 Q3,2014 Q4,2014 Q2 %,2014 Q3 %,2014 Q4 %
0,United States1 ..............,17316314,16941430,"17,225,767 17,497,215 17,600,843",,100.0,,100.0 100.0 100.0,,100.0,17225767,17497215,17600843,100.0,100.0,100.0
1,New England ...............,924937,911347,"919,972 929,812 938,616",,5.3,,5.4 5.3 5.3,,5.3,919972,929812,938616,5.4,5.3,5.3
2,Connecticut ...............,253036,249336,"252,980 253,856 255,972",,1.5,,1.5 1.5 1.5,,1.5,252980,253856,255972,1.5,1.5,1.5
3,Maine .....................,55838,54800,"55,588 56,240 56,726",,0.3,,0.3 0.3 0.3,,0.3,55588,56240,56726,0.3,0.3,0.3
4,Massachusetts .........,459937,454249,"455,425 462,636 467,436",,2.7,,2.7 2.6 2.6,,2.7,455425,462636,467436,2.7,2.6,2.6
5,New Hampshire .........,71552,69842,"71,895 71,929 72,542",,0.4,,0.4 0.4 0.4,,0.4,71895,71929,72542,0.4,0.4,0.4
6,Rhode Island ............,54960,54103,"54,795 55,245 55,697",,0.3,,0.3 0.3 0.3,,0.3,54795,55245,55697,0.3,0.3,0.3
7,Vermont ..................,29613,29016,"29,289 29,905 30,244",,0.2,,0.2 0.2 0.2,,0.2,29289,29905,30244,0.2,0.2,0.2
8,Mideast .....................,3143366,3087518,"3,124,845 3,172,793 3,188,307",,18.2,,18.2 18.1 18.1,,18.1,3124845,3172793,3188307,18.2,18.1,18.1
9,Delaware ..................,62756,61094,"62,375 63,646 63,908",,0.4,,0.4 0.4 0.4,,0.4,62375,63646,63908,0.4,0.4,0.4


In [288]:
GDP2014 = GDP2014.drop(['Millions of dollars', 'Percent of the U.S.'], axis = 1)
GDP2014 = GDP2014.drop(['Unnamed: 3', 'Unnamed: 5', 'Unnamed: 6'], axis = 1)
GDP2014

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 4,Unnamed: 7,2014 Q2,2014 Q3,2014 Q4,2014 Q2 %,2014 Q3 %,2014 Q4 %
0,United States1 ..............,17316314,16941430,100.0,100.0,17225767,17497215,17600843,100.0,100.0,100.0
1,New England ...............,924937,911347,5.3,5.3,919972,929812,938616,5.4,5.3,5.3
2,Connecticut ...............,253036,249336,1.5,1.5,252980,253856,255972,1.5,1.5,1.5
3,Maine .....................,55838,54800,0.3,0.3,55588,56240,56726,0.3,0.3,0.3
4,Massachusetts .........,459937,454249,2.7,2.7,455425,462636,467436,2.7,2.6,2.6
5,New Hampshire .........,71552,69842,0.4,0.4,71895,71929,72542,0.4,0.4,0.4
6,Rhode Island ............,54960,54103,0.3,0.3,54795,55245,55697,0.3,0.3,0.3
7,Vermont ..................,29613,29016,0.2,0.2,29289,29905,30244,0.2,0.2,0.2
8,Mideast .....................,3143366,3087518,18.2,18.1,3124845,3172793,3188307,18.2,18.1,18.1
9,Delaware ..................,62756,61094,0.4,0.4,62375,63646,63908,0.4,0.4,0.4


In [289]:
for column in GDP2014.columns:
    if column == 'Unnamed: 0':
        pass
    else:
        for i in GDP2014[column].index:
            if isinstance(GDP2014.loc[i, column], float):
                pass
            else:
                GDP2014.loc[i, column] = GDP2014.loc[i, column].replace(',','')
GDP2014 = GDP2014.apply(pd.to_numeric, errors = 'ignore')
GDP2014

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 4,Unnamed: 7,2014 Q2,2014 Q3,2014 Q4,2014 Q2 %,2014 Q3 %,2014 Q4 %
0,United States1 ..............,17316314,16941430,100.0,100.0,17225767,17497215,17600843,100.0,100.0,100.0
1,New England ...............,924937,911347,5.3,5.3,919972,929812,938616,5.4,5.3,5.3
2,Connecticut ...............,253036,249336,1.5,1.5,252980,253856,255972,1.5,1.5,1.5
3,Maine .....................,55838,54800,0.3,0.3,55588,56240,56726,0.3,0.3,0.3
4,Massachusetts .........,459937,454249,2.7,2.7,455425,462636,467436,2.7,2.6,2.6
5,New Hampshire .........,71552,69842,0.4,0.4,71895,71929,72542,0.4,0.4,0.4
6,Rhode Island ............,54960,54103,0.3,0.3,54795,55245,55697,0.3,0.3,0.3
7,Vermont ..................,29613,29016,0.2,0.2,29289,29905,30244,0.2,0.2,0.2
8,Mideast .....................,3143366,3087518,18.2,18.1,3124845,3172793,3188307,18.2,18.1,18.1
9,Delaware ..................,62756,61094,0.4,0.4,62375,63646,63908,0.4,0.4,0.4


In [290]:
GDP2014.columns = ['Area', '2014', '2014 Q1', '2014 %', '2014 Q1 %', '2014 Q2', '2014 Q3', '2014 Q4',
                 '2014 Q2 %', '2014 Q3 %', '2014 Q4%']
GDP2014

Unnamed: 0,Area,2014,2014 Q1,2014 %,2014 Q1 %,2014 Q2,2014 Q3,2014 Q4,2014 Q2 %,2014 Q3 %,2014 Q4%
0,United States1 ..............,17316314,16941430,100.0,100.0,17225767,17497215,17600843,100.0,100.0,100.0
1,New England ...............,924937,911347,5.3,5.3,919972,929812,938616,5.4,5.3,5.3
2,Connecticut ...............,253036,249336,1.5,1.5,252980,253856,255972,1.5,1.5,1.5
3,Maine .....................,55838,54800,0.3,0.3,55588,56240,56726,0.3,0.3,0.3
4,Massachusetts .........,459937,454249,2.7,2.7,455425,462636,467436,2.7,2.6,2.6
5,New Hampshire .........,71552,69842,0.4,0.4,71895,71929,72542,0.4,0.4,0.4
6,Rhode Island ............,54960,54103,0.3,0.3,54795,55245,55697,0.3,0.3,0.3
7,Vermont ..................,29613,29016,0.2,0.2,29289,29905,30244,0.2,0.2,0.2
8,Mideast .....................,3143366,3087518,18.2,18.1,3124845,3172793,3188307,18.2,18.1,18.1
9,Delaware ..................,62756,61094,0.4,0.4,62375,63646,63908,0.4,0.4,0.4


In [291]:
GDP_2014 = QuarterlyGDP('GDP2014_quarterly.pdf', 7, 2014, 2014, '2014')
GDP_2014.df = GDP2014
GDP_2014.df

Unnamed: 0,Area,2014,2014 Q1,2014 %,2014 Q1 %,2014 Q2,2014 Q3,2014 Q4,2014 Q2 %,2014 Q3 %,2014 Q4%
0,United States1 ..............,17316314,16941430,100.0,100.0,17225767,17497215,17600843,100.0,100.0,100.0
1,New England ...............,924937,911347,5.3,5.3,919972,929812,938616,5.4,5.3,5.3
2,Connecticut ...............,253036,249336,1.5,1.5,252980,253856,255972,1.5,1.5,1.5
3,Maine .....................,55838,54800,0.3,0.3,55588,56240,56726,0.3,0.3,0.3
4,Massachusetts .........,459937,454249,2.7,2.7,455425,462636,467436,2.7,2.6,2.6
5,New Hampshire .........,71552,69842,0.4,0.4,71895,71929,72542,0.4,0.4,0.4
6,Rhode Island ............,54960,54103,0.3,0.3,54795,55245,55697,0.3,0.3,0.3
7,Vermont ..................,29613,29016,0.2,0.2,29289,29905,30244,0.2,0.2,0.2
8,Mideast .....................,3143366,3087518,18.2,18.1,3124845,3172793,3188307,18.2,18.1,18.1
9,Delaware ..................,62756,61094,0.4,0.4,62375,63646,63908,0.4,0.4,0.4


In [292]:
GDP_2014.cleanStates()

Unnamed: 0,Area,2014,2014 Q1,2014 %,2014 Q1 %,2014 Q2,2014 Q3,2014 Q4,2014 Q2 %,2014 Q3 %,2014 Q4%,Cleaned Area
0,United States1 ..............,17316314,16941430,100.0,100.0,17225767,17497215,17600843,100.0,100.0,100.0,United States
1,New England ...............,924937,911347,5.3,5.3,919972,929812,938616,5.4,5.3,5.3,New England
2,Connecticut ...............,253036,249336,1.5,1.5,252980,253856,255972,1.5,1.5,1.5,Connecticut
3,Maine .....................,55838,54800,0.3,0.3,55588,56240,56726,0.3,0.3,0.3,Maine
4,Massachusetts .........,459937,454249,2.7,2.7,455425,462636,467436,2.7,2.6,2.6,Massachusetts
5,New Hampshire .........,71552,69842,0.4,0.4,71895,71929,72542,0.4,0.4,0.4,New Hampshire
6,Rhode Island ............,54960,54103,0.3,0.3,54795,55245,55697,0.3,0.3,0.3,Rhode Island
7,Vermont ..................,29613,29016,0.2,0.2,29289,29905,30244,0.2,0.2,0.2,Vermont
8,Mideast .....................,3143366,3087518,18.2,18.1,3124845,3172793,3188307,18.2,18.1,18.1,Mideast
9,Delaware ..................,62756,61094,0.4,0.4,62375,63646,63908,0.4,0.4,0.4,Delaware


In [293]:
GDP_2014.addGeoLocColumn()

Unnamed: 0,Area,2014,2014 Q1,2014 %,2014 Q1 %,2014 Q2,2014 Q3,2014 Q4,2014 Q2 %,2014 Q3 %,2014 Q4%,Cleaned Area,Geo Loc
0,United States1 ..............,17316314,16941430,100.0,100.0,17225767,17497215,17600843,100.0,100.0,100.0,United States,
1,New England ...............,924937,911347,5.3,5.3,919972,929812,938616,5.4,5.3,5.3,New England,
2,Connecticut ...............,253036,249336,1.5,1.5,252980,253856,255972,1.5,1.5,1.5,Connecticut,New England
3,Maine .....................,55838,54800,0.3,0.3,55588,56240,56726,0.3,0.3,0.3,Maine,New England
4,Massachusetts .........,459937,454249,2.7,2.7,455425,462636,467436,2.7,2.6,2.6,Massachusetts,New England
5,New Hampshire .........,71552,69842,0.4,0.4,71895,71929,72542,0.4,0.4,0.4,New Hampshire,New England
6,Rhode Island ............,54960,54103,0.3,0.3,54795,55245,55697,0.3,0.3,0.3,Rhode Island,New England
7,Vermont ..................,29613,29016,0.2,0.2,29289,29905,30244,0.2,0.2,0.2,Vermont,New England
8,Mideast .....................,3143366,3087518,18.2,18.1,3124845,3172793,3188307,18.2,18.1,18.1,Mideast,
9,Delaware ..................,62756,61094,0.4,0.4,62375,63646,63908,0.4,0.4,0.4,Delaware,Mideast


In [294]:
states = ["Alaska", "Alabama", "Arkansas", "Arizona", "California", "Colorado", "Connecticut", "District of Columbia",
          "Delaware", "Florida", "Georgia", "Hawaii", "Iowa", "Idaho", "Illinois", "Indiana", "Kansas", "Kentucky", 
          "Louisiana", "Massachusetts", "Maryland", "Maine", "Michigan", "Minnesota", "Missouri", "Mississippi", "Montana", 
          "North Carolina", "North Dakota", "Nebraska", "New Hampshire", "New Jersey", "New Mexico", "Nevada", "New York", 
          "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota", "Tennessee", 
          "Texas", "Utah", "Virginia", "Vermont", "Washington", "Wisconsin", "West Virginia", "Wyoming"]
        
        # empty DataFrame to hold the new columns
GDPdf = pd.DataFrame()
        # count variable to increase the index by one after each row is added
count = 0
        
        # for loop to iterate through the years specified by the instance
for year in range(GDP_2014.yearStart, GDP_2014.yearEnd + 1):
            # for each state in that year, add in the following columns
    for state in states:
        GDPdf.loc[count, 'year'] = str(year)
        GDPdf.loc[count, 'state'] = state
        GDPdf.loc[count, 'current dollars'] = GDP_2014.dollars
        GDPdf.loc[count, 'GDP'] = GDP_2014.df.loc[GDP_2014.df['Cleaned Area'] == state, str(year)].item()
        GDPdf.loc[count, 'GDP_area'] = GDP_2014.df.loc[GDP_2014.df['Cleaned Area'] == state, 'Geo Loc'].item()
        count += 1

GDP_2014.df = GDPdf
GDP_2014.df

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,2014,Alaska,2014,57080.0,Far West
1,2014,Alabama,2014,199440.0,Southeast
2,2014,Arkansas,2014,121395.0,Southeast
3,2014,Arizona,2014,284156.0,Southwest
4,2014,California,2014,2311616.0,Far West
5,2014,Colorado,2014,306663.0,Rocky Mountain
6,2014,Connecticut,2014,253036.0,New England
7,2014,District of Columbia,2014,115473.0,Mideast
8,2014,Delaware,2014,62756.0,Mideast
9,2014,Florida,2014,839944.0,Southeast


# 2015 - 2016 Quarterly Data

In [295]:
GDP15_16 = tabula.read_pdf('GDP2015-2016_quarterly.pdf', pages = 10, stream = True)
GDP15_16

[                           Unnamed: 0                            Unnamed: 1  \
 0                                 NaN                                   NaN   
 1                                 NaN                                   NaN   
 2                                 NaN                                    Q1   
 3                                   1                                   NaN   
 4                       United States  ..........................17,671,025   
 5   New England .....................                               941,945   
 6        Connecticut ................                               250,628   
 7      Maine ........................                                55,930   
 8          Massachusetts ............                               478,320   
 9           New Hampshire ...........                                72,395   
 10        Rhode Island ..............                                54,858   
 11      Vermont .....................  

In [296]:
GDP15_16 = pd.concat(GDP15_16, ignore_index = True)
GDP15_16

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Percent of the U.S.,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17
0,,,,,,Seasonally adjusted at annual rates,,,,,,,,,,,,,
1,,,,2015.0,,,,2016.0,,,,,2015.0,,,,2016.0,,
2,,Q1,Q2,,Q3,Q4 Q1,Q2,,Q3,Q4,Q1,Q2,,Q3,Q4 Q1,Q2,,Q3,Q4
3,1,,,,,,,,,,,,,,,,,,
4,United States,"..........................17,671,025",17886695,,18030967,"18,111,878 18,170,091",18337791,,18561915,18755370,100.0,100.0,,100.0,100.0 100.0,100.0,,100.0,100.0
5,New England .....................,941945,959027,,964703,"982,152 983,200",990080,,1002400,1012104,5.3,,5.4,5.4,5.4 5.4,,5.4,5.4,5.4
6,Connecticut ................,250628,256514,,257895,"260,185 260,604",262341,,263973,266597,1.4,,1.4,1.4,1.4 1.4,,1.4,1.4,1.4
7,Maine ........................,55930,57230,,57716,"58,453 58,540",59224,,59478,59857,0.3,,0.3,0.3,0.3 0.3,,0.3,0.3,0.3
8,Massachusetts ............,478320,485938,,487509,"500,634 500,418",503357,,511392,516483,2.7,,2.7,2.7,2.8 2.8,,2.7,2.8,2.8
9,New Hampshire ...........,72395,73810,,75038,"75,833 76,737",77201,,78329,79155,0.4,,0.4,0.4,0.4 0.4,,0.4,0.4,0.4


In [297]:
GDP15_16 = GDP15_16.drop(GDP15_16.index[0:4]).reset_index(drop = True)

In [298]:
GDP15_16

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Percent of the U.S.,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17
0,United States,"..........................17,671,025",17886695,,18030967,"18,111,878 18,170,091",18337791,,18561915,18755370,100.0,100.0,,100.0,100.0 100.0,100.0,,100.0,100.0
1,New England .....................,941945,959027,,964703,"982,152 983,200",990080,,1002400,1012104,5.3,,5.4,5.4,5.4 5.4,,5.4,5.4,5.4
2,Connecticut ................,250628,256514,,257895,"260,185 260,604",262341,,263973,266597,1.4,,1.4,1.4,1.4 1.4,,1.4,1.4,1.4
3,Maine ........................,55930,57230,,57716,"58,453 58,540",59224,,59478,59857,0.3,,0.3,0.3,0.3 0.3,,0.3,0.3,0.3
4,Massachusetts ............,478320,485938,,487509,"500,634 500,418",503357,,511392,516483,2.7,,2.7,2.7,2.8 2.8,,2.7,2.8,2.8
5,New Hampshire ...........,72395,73810,,75038,"75,833 76,737",77201,,78329,79155,0.4,,0.4,0.4,0.4 0.4,,0.4,0.4,0.4
6,Rhode Island ..............,54858,55401,,55884,"56,457 56,087",57099,,58017,58527,0.3,,0.3,0.3,0.3 0.3,,0.3,0.3,0.3
7,Vermont .....................,29815,30135,,30660,"30,591 30,814",30859,,31211,31483,0.2,,0.2,0.2,0.2 0.2,,0.2,0.2,0.2
8,Mideast .......................,3223326,3270682,,3302756,"3,301,626 3,329,683",3358622,,3381370,3408478,18.2,18.3,,18.3,18.2 18.3,18.3,,18.2,18.2
9,Delaware ....................,68216,68898,,69086,"69,256 68,952",69747,,71080,71769,0.4,,0.4,0.4,0.4 0.4,,0.4,0.4,0.4


In [299]:
GDP15_16 = GDP15_16.drop(['Unnamed: 3', 'Unnamed: 7','Unnamed: 11', 'Unnamed: 14'], axis = 1)
GDP15_16

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 12,Unnamed: 13,Percent of the U.S.,Unnamed: 15,Unnamed: 16,Unnamed: 17
0,United States,"..........................17,671,025",17886695,18030967,"18,111,878 18,170,091",18337791,18561915,18755370,100.0,,100.0,100.0 100.0,,100.0,100.0
1,New England .....................,941945,959027,964703,"982,152 983,200",990080,1002400,1012104,5.3,5.4,5.4,5.4 5.4,5.4,5.4,5.4
2,Connecticut ................,250628,256514,257895,"260,185 260,604",262341,263973,266597,1.4,1.4,1.4,1.4 1.4,1.4,1.4,1.4
3,Maine ........................,55930,57230,57716,"58,453 58,540",59224,59478,59857,0.3,0.3,0.3,0.3 0.3,0.3,0.3,0.3
4,Massachusetts ............,478320,485938,487509,"500,634 500,418",503357,511392,516483,2.7,2.7,2.7,2.8 2.8,2.7,2.8,2.8
5,New Hampshire ...........,72395,73810,75038,"75,833 76,737",77201,78329,79155,0.4,0.4,0.4,0.4 0.4,0.4,0.4,0.4
6,Rhode Island ..............,54858,55401,55884,"56,457 56,087",57099,58017,58527,0.3,0.3,0.3,0.3 0.3,0.3,0.3,0.3
7,Vermont .....................,29815,30135,30660,"30,591 30,814",30859,31211,31483,0.2,0.2,0.2,0.2 0.2,0.2,0.2,0.2
8,Mideast .......................,3223326,3270682,3302756,"3,301,626 3,329,683",3358622,3381370,3408478,18.2,,18.3,18.2 18.3,,18.2,18.2
9,Delaware ....................,68216,68898,69086,"69,256 68,952",69747,71080,71769,0.4,0.4,0.4,0.4 0.4,0.4,0.4,0.4


In [300]:
for i in GDP15_16['Unnamed: 5'].index:
    splitList = GDP15_16.loc[i, 'Unnamed: 5'].split(" ")
    GDP15_16.loc[i, '2015 Q4'] = splitList[0]
    GDP15_16.loc[i, '2016 Q1'] = splitList[1]
GDP15_16

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 12,Unnamed: 13,Percent of the U.S.,Unnamed: 15,Unnamed: 16,Unnamed: 17,2015 Q4,2016 Q1
0,United States,"..........................17,671,025",17886695,18030967,"18,111,878 18,170,091",18337791,18561915,18755370,100.0,,100.0,100.0 100.0,,100.0,100.0,18111878,18170091
1,New England .....................,941945,959027,964703,"982,152 983,200",990080,1002400,1012104,5.3,5.4,5.4,5.4 5.4,5.4,5.4,5.4,982152,983200
2,Connecticut ................,250628,256514,257895,"260,185 260,604",262341,263973,266597,1.4,1.4,1.4,1.4 1.4,1.4,1.4,1.4,260185,260604
3,Maine ........................,55930,57230,57716,"58,453 58,540",59224,59478,59857,0.3,0.3,0.3,0.3 0.3,0.3,0.3,0.3,58453,58540
4,Massachusetts ............,478320,485938,487509,"500,634 500,418",503357,511392,516483,2.7,2.7,2.7,2.8 2.8,2.7,2.8,2.8,500634,500418
5,New Hampshire ...........,72395,73810,75038,"75,833 76,737",77201,78329,79155,0.4,0.4,0.4,0.4 0.4,0.4,0.4,0.4,75833,76737
6,Rhode Island ..............,54858,55401,55884,"56,457 56,087",57099,58017,58527,0.3,0.3,0.3,0.3 0.3,0.3,0.3,0.3,56457,56087
7,Vermont .....................,29815,30135,30660,"30,591 30,814",30859,31211,31483,0.2,0.2,0.2,0.2 0.2,0.2,0.2,0.2,30591,30814
8,Mideast .......................,3223326,3270682,3302756,"3,301,626 3,329,683",3358622,3381370,3408478,18.2,,18.3,18.2 18.3,,18.2,18.2,3301626,3329683
9,Delaware ....................,68216,68898,69086,"69,256 68,952",69747,71080,71769,0.4,0.4,0.4,0.4 0.4,0.4,0.4,0.4,69256,68952


In [301]:
for i in GDP15_16['Percent of the U.S.'].index:
    splitList = GDP15_16.loc[i, 'Percent of the U.S.'].split(" ")
    GDP15_16.loc[i, '2015 Q4 %'] = splitList[0]
    GDP15_16.loc[i, '2016 Q1 %'] = splitList[1]
GDP15_16

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 12,Unnamed: 13,Percent of the U.S.,Unnamed: 15,Unnamed: 16,Unnamed: 17,2015 Q4,2016 Q1,2015 Q4 %,2016 Q1 %
0,United States,"..........................17,671,025",17886695,18030967,"18,111,878 18,170,091",18337791,18561915,18755370,100.0,,100.0,100.0 100.0,,100.0,100.0,18111878,18170091,100.0,100.0
1,New England .....................,941945,959027,964703,"982,152 983,200",990080,1002400,1012104,5.3,5.4,5.4,5.4 5.4,5.4,5.4,5.4,982152,983200,5.4,5.4
2,Connecticut ................,250628,256514,257895,"260,185 260,604",262341,263973,266597,1.4,1.4,1.4,1.4 1.4,1.4,1.4,1.4,260185,260604,1.4,1.4
3,Maine ........................,55930,57230,57716,"58,453 58,540",59224,59478,59857,0.3,0.3,0.3,0.3 0.3,0.3,0.3,0.3,58453,58540,0.3,0.3
4,Massachusetts ............,478320,485938,487509,"500,634 500,418",503357,511392,516483,2.7,2.7,2.7,2.8 2.8,2.7,2.8,2.8,500634,500418,2.8,2.8
5,New Hampshire ...........,72395,73810,75038,"75,833 76,737",77201,78329,79155,0.4,0.4,0.4,0.4 0.4,0.4,0.4,0.4,75833,76737,0.4,0.4
6,Rhode Island ..............,54858,55401,55884,"56,457 56,087",57099,58017,58527,0.3,0.3,0.3,0.3 0.3,0.3,0.3,0.3,56457,56087,0.3,0.3
7,Vermont .....................,29815,30135,30660,"30,591 30,814",30859,31211,31483,0.2,0.2,0.2,0.2 0.2,0.2,0.2,0.2,30591,30814,0.2,0.2
8,Mideast .......................,3223326,3270682,3302756,"3,301,626 3,329,683",3358622,3381370,3408478,18.2,,18.3,18.2 18.3,,18.2,18.2,3301626,3329683,18.2,18.3
9,Delaware ....................,68216,68898,69086,"69,256 68,952",69747,71080,71769,0.4,0.4,0.4,0.4 0.4,0.4,0.4,0.4,69256,68952,0.4,0.4


In [302]:
GDP15_16 = GDP15_16.drop('Percent of the U.S.', axis = 1)
GDP15_16 = GDP15_16.drop('Unnamed: 5', axis = 1)
GDP15_16

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 4,Unnamed: 6,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 12,Unnamed: 13,Unnamed: 15,Unnamed: 16,Unnamed: 17,2015 Q4,2016 Q1,2015 Q4 %,2016 Q1 %
0,United States,"..........................17,671,025",17886695,18030967,18337791,18561915,18755370,100.0,,100.0,,100.0,100.0,18111878,18170091,100.0,100.0
1,New England .....................,941945,959027,964703,990080,1002400,1012104,5.3,5.4,5.4,5.4,5.4,5.4,982152,983200,5.4,5.4
2,Connecticut ................,250628,256514,257895,262341,263973,266597,1.4,1.4,1.4,1.4,1.4,1.4,260185,260604,1.4,1.4
3,Maine ........................,55930,57230,57716,59224,59478,59857,0.3,0.3,0.3,0.3,0.3,0.3,58453,58540,0.3,0.3
4,Massachusetts ............,478320,485938,487509,503357,511392,516483,2.7,2.7,2.7,2.7,2.8,2.8,500634,500418,2.8,2.8
5,New Hampshire ...........,72395,73810,75038,77201,78329,79155,0.4,0.4,0.4,0.4,0.4,0.4,75833,76737,0.4,0.4
6,Rhode Island ..............,54858,55401,55884,57099,58017,58527,0.3,0.3,0.3,0.3,0.3,0.3,56457,56087,0.3,0.3
7,Vermont .....................,29815,30135,30660,30859,31211,31483,0.2,0.2,0.2,0.2,0.2,0.2,30591,30814,0.2,0.2
8,Mideast .......................,3223326,3270682,3302756,3358622,3381370,3408478,18.2,,18.3,,18.2,18.2,3301626,3329683,18.2,18.3
9,Delaware ....................,68216,68898,69086,69747,71080,71769,0.4,0.4,0.4,0.4,0.4,0.4,69256,68952,0.4,0.4


In [303]:
GDP15_16.columns = ['Area', '2015 Q1', '2015 Q2', '2015 Q3', '2016 Q2',
                 '2016 Q3', '2016 Q4','2015 Q1 %', '2015 Q2 %', '2015 Q3 %', '2016 Q2 %',
                 '2016 Q3 %', '2016 Q4 %', '2015 Q4', '2016 Q1', '2015 Q4 %', '2016 Q1 %']
GDP15_16

Unnamed: 0,Area,2015 Q1,2015 Q2,2015 Q3,2016 Q2,2016 Q3,2016 Q4,2015 Q1 %,2015 Q2 %,2015 Q3 %,2016 Q2 %,2016 Q3 %,2016 Q4 %,2015 Q4,2016 Q1,2015 Q4 %,2016 Q1 %
0,United States,"..........................17,671,025",17886695,18030967,18337791,18561915,18755370,100.0,,100.0,,100.0,100.0,18111878,18170091,100.0,100.0
1,New England .....................,941945,959027,964703,990080,1002400,1012104,5.3,5.4,5.4,5.4,5.4,5.4,982152,983200,5.4,5.4
2,Connecticut ................,250628,256514,257895,262341,263973,266597,1.4,1.4,1.4,1.4,1.4,1.4,260185,260604,1.4,1.4
3,Maine ........................,55930,57230,57716,59224,59478,59857,0.3,0.3,0.3,0.3,0.3,0.3,58453,58540,0.3,0.3
4,Massachusetts ............,478320,485938,487509,503357,511392,516483,2.7,2.7,2.7,2.7,2.8,2.8,500634,500418,2.8,2.8
5,New Hampshire ...........,72395,73810,75038,77201,78329,79155,0.4,0.4,0.4,0.4,0.4,0.4,75833,76737,0.4,0.4
6,Rhode Island ..............,54858,55401,55884,57099,58017,58527,0.3,0.3,0.3,0.3,0.3,0.3,56457,56087,0.3,0.3
7,Vermont .....................,29815,30135,30660,30859,31211,31483,0.2,0.2,0.2,0.2,0.2,0.2,30591,30814,0.2,0.2
8,Mideast .......................,3223326,3270682,3302756,3358622,3381370,3408478,18.2,,18.3,,18.2,18.2,3301626,3329683,18.2,18.3
9,Delaware ....................,68216,68898,69086,69747,71080,71769,0.4,0.4,0.4,0.4,0.4,0.4,69256,68952,0.4,0.4


In [304]:
for column in GDP15_16.columns:
    if column == 'Area':
        pass
    else:
        for i in GDP15_16[column].index:
            if isinstance(GDP15_16.loc[i, column], float):
                pass
            else:
                GDP15_16.loc[i, column] = GDP15_16.loc[i, column].replace(',','')
GDP15_16 = GDP15_16.apply(pd.to_numeric, errors = 'ignore')
GDP15_16

Unnamed: 0,Area,2015 Q1,2015 Q2,2015 Q3,2016 Q2,2016 Q3,2016 Q4,2015 Q1 %,2015 Q2 %,2015 Q3 %,2016 Q2 %,2016 Q3 %,2016 Q4 %,2015 Q4,2016 Q1,2015 Q4 %,2016 Q1 %
0,United States,..........................17671025,17886695,18030967,18337791,18561915,18755370,100.0,,100.0,,100.0,100.0,18111878,18170091,100.0,100.0
1,New England .....................,941945,959027,964703,990080,1002400,1012104,5.3,5.4,5.4,5.4,5.4,5.4,982152,983200,5.4,5.4
2,Connecticut ................,250628,256514,257895,262341,263973,266597,1.4,1.4,1.4,1.4,1.4,1.4,260185,260604,1.4,1.4
3,Maine ........................,55930,57230,57716,59224,59478,59857,0.3,0.3,0.3,0.3,0.3,0.3,58453,58540,0.3,0.3
4,Massachusetts ............,478320,485938,487509,503357,511392,516483,2.7,2.7,2.7,2.7,2.8,2.8,500634,500418,2.8,2.8
5,New Hampshire ...........,72395,73810,75038,77201,78329,79155,0.4,0.4,0.4,0.4,0.4,0.4,75833,76737,0.4,0.4
6,Rhode Island ..............,54858,55401,55884,57099,58017,58527,0.3,0.3,0.3,0.3,0.3,0.3,56457,56087,0.3,0.3
7,Vermont .....................,29815,30135,30660,30859,31211,31483,0.2,0.2,0.2,0.2,0.2,0.2,30591,30814,0.2,0.2
8,Mideast .......................,3223326,3270682,3302756,3358622,3381370,3408478,18.2,,18.3,,18.2,18.2,3301626,3329683,18.2,18.3
9,Delaware ....................,68216,68898,69086,69747,71080,71769,0.4,0.4,0.4,0.4,0.4,0.4,69256,68952,0.4,0.4


In [305]:
gdp2015 = ['2015 Q1', '2015 Q2', '2015 Q3', '2015 Q4']
gdp2016 = ['2016 Q1', '2016 Q2', '2016 Q3', '2016 Q4']

filter_2015 = GDP15_16.filter(items = gdp2015)
filter_2016 = GDP15_16.filter(items = gdp2016)

GDP15_16['2015'] = filter_2015.mean(axis = 1)
GDP15_16['2016'] = filter_2016.mean(axis = 1)

GDP15_16

Unnamed: 0,Area,2015 Q1,2015 Q2,2015 Q3,2016 Q2,2016 Q3,2016 Q4,2015 Q1 %,2015 Q2 %,2015 Q3 %,2016 Q2 %,2016 Q3 %,2016 Q4 %,2015 Q4,2016 Q1,2015 Q4 %,2016 Q1 %,2015,2016
0,United States,..........................17671025,17886695,18030967,18337791,18561915,18755370,100.0,,100.0,,100.0,100.0,18111878,18170091,100.0,100.0,18009850.0,18456291.75
1,New England .....................,941945,959027,964703,990080,1002400,1012104,5.3,5.4,5.4,5.4,5.4,5.4,982152,983200,5.4,5.4,968627.3,996946.0
2,Connecticut ................,250628,256514,257895,262341,263973,266597,1.4,1.4,1.4,1.4,1.4,1.4,260185,260604,1.4,1.4,258198.0,263378.75
3,Maine ........................,55930,57230,57716,59224,59478,59857,0.3,0.3,0.3,0.3,0.3,0.3,58453,58540,0.3,0.3,57799.67,59274.75
4,Massachusetts ............,478320,485938,487509,503357,511392,516483,2.7,2.7,2.7,2.7,2.8,2.8,500634,500418,2.8,2.8,491360.3,507912.5
5,New Hampshire ...........,72395,73810,75038,77201,78329,79155,0.4,0.4,0.4,0.4,0.4,0.4,75833,76737,0.4,0.4,74893.67,77855.5
6,Rhode Island ..............,54858,55401,55884,57099,58017,58527,0.3,0.3,0.3,0.3,0.3,0.3,56457,56087,0.3,0.3,55914.0,57432.5
7,Vermont .....................,29815,30135,30660,30859,31211,31483,0.2,0.2,0.2,0.2,0.2,0.2,30591,30814,0.2,0.2,30462.0,31091.75
8,Mideast .......................,3223326,3270682,3302756,3358622,3381370,3408478,18.2,,18.3,,18.2,18.2,3301626,3329683,18.2,18.3,3291688.0,3369538.25
9,Delaware ....................,68216,68898,69086,69747,71080,71769,0.4,0.4,0.4,0.4,0.4,0.4,69256,68952,0.4,0.4,69080.0,70387.0


In [306]:
GDP2015_2016 = QuarterlyGDP('GDP2015-2016_quarterly.pdf', 10, 2015, 2016, '2016')

In [307]:
GDP2015_2016.df = GDP15_16

In [308]:
GDP2015_2016.df

Unnamed: 0,Area,2015 Q1,2015 Q2,2015 Q3,2016 Q2,2016 Q3,2016 Q4,2015 Q1 %,2015 Q2 %,2015 Q3 %,2016 Q2 %,2016 Q3 %,2016 Q4 %,2015 Q4,2016 Q1,2015 Q4 %,2016 Q1 %,2015,2016
0,United States,..........................17671025,17886695,18030967,18337791,18561915,18755370,100.0,,100.0,,100.0,100.0,18111878,18170091,100.0,100.0,18009850.0,18456291.75
1,New England .....................,941945,959027,964703,990080,1002400,1012104,5.3,5.4,5.4,5.4,5.4,5.4,982152,983200,5.4,5.4,968627.3,996946.0
2,Connecticut ................,250628,256514,257895,262341,263973,266597,1.4,1.4,1.4,1.4,1.4,1.4,260185,260604,1.4,1.4,258198.0,263378.75
3,Maine ........................,55930,57230,57716,59224,59478,59857,0.3,0.3,0.3,0.3,0.3,0.3,58453,58540,0.3,0.3,57799.67,59274.75
4,Massachusetts ............,478320,485938,487509,503357,511392,516483,2.7,2.7,2.7,2.7,2.8,2.8,500634,500418,2.8,2.8,491360.3,507912.5
5,New Hampshire ...........,72395,73810,75038,77201,78329,79155,0.4,0.4,0.4,0.4,0.4,0.4,75833,76737,0.4,0.4,74893.67,77855.5
6,Rhode Island ..............,54858,55401,55884,57099,58017,58527,0.3,0.3,0.3,0.3,0.3,0.3,56457,56087,0.3,0.3,55914.0,57432.5
7,Vermont .....................,29815,30135,30660,30859,31211,31483,0.2,0.2,0.2,0.2,0.2,0.2,30591,30814,0.2,0.2,30462.0,31091.75
8,Mideast .......................,3223326,3270682,3302756,3358622,3381370,3408478,18.2,,18.3,,18.2,18.2,3301626,3329683,18.2,18.3,3291688.0,3369538.25
9,Delaware ....................,68216,68898,69086,69747,71080,71769,0.4,0.4,0.4,0.4,0.4,0.4,69256,68952,0.4,0.4,69080.0,70387.0


In [309]:
GDP2015_2016.cleanStates()

Unnamed: 0,Area,2015 Q1,2015 Q2,2015 Q3,2016 Q2,2016 Q3,2016 Q4,2015 Q1 %,2015 Q2 %,2015 Q3 %,2016 Q2 %,2016 Q3 %,2016 Q4 %,2015 Q4,2016 Q1,2015 Q4 %,2016 Q1 %,2015,2016,Cleaned Area
0,United States,..........................17671025,17886695,18030967,18337791,18561915,18755370,100.0,,100.0,,100.0,100.0,18111878,18170091,100.0,100.0,18009850.0,18456291.75,United States
1,New England .....................,941945,959027,964703,990080,1002400,1012104,5.3,5.4,5.4,5.4,5.4,5.4,982152,983200,5.4,5.4,968627.3,996946.0,New England
2,Connecticut ................,250628,256514,257895,262341,263973,266597,1.4,1.4,1.4,1.4,1.4,1.4,260185,260604,1.4,1.4,258198.0,263378.75,Connecticut
3,Maine ........................,55930,57230,57716,59224,59478,59857,0.3,0.3,0.3,0.3,0.3,0.3,58453,58540,0.3,0.3,57799.67,59274.75,Maine
4,Massachusetts ............,478320,485938,487509,503357,511392,516483,2.7,2.7,2.7,2.7,2.8,2.8,500634,500418,2.8,2.8,491360.3,507912.5,Massachusetts
5,New Hampshire ...........,72395,73810,75038,77201,78329,79155,0.4,0.4,0.4,0.4,0.4,0.4,75833,76737,0.4,0.4,74893.67,77855.5,New Hampshire
6,Rhode Island ..............,54858,55401,55884,57099,58017,58527,0.3,0.3,0.3,0.3,0.3,0.3,56457,56087,0.3,0.3,55914.0,57432.5,Rhode Island
7,Vermont .....................,29815,30135,30660,30859,31211,31483,0.2,0.2,0.2,0.2,0.2,0.2,30591,30814,0.2,0.2,30462.0,31091.75,Vermont
8,Mideast .......................,3223326,3270682,3302756,3358622,3381370,3408478,18.2,,18.3,,18.2,18.2,3301626,3329683,18.2,18.3,3291688.0,3369538.25,Mideast
9,Delaware ....................,68216,68898,69086,69747,71080,71769,0.4,0.4,0.4,0.4,0.4,0.4,69256,68952,0.4,0.4,69080.0,70387.0,Delaware


In [310]:
GDP2015_2016.addGeoLocColumn()

Unnamed: 0,Area,2015 Q1,2015 Q2,2015 Q3,2016 Q2,2016 Q3,2016 Q4,2015 Q1 %,2015 Q2 %,2015 Q3 %,2016 Q2 %,2016 Q3 %,2016 Q4 %,2015 Q4,2016 Q1,2015 Q4 %,2016 Q1 %,2015,2016,Cleaned Area,Geo Loc
0,United States,..........................17671025,17886695,18030967,18337791,18561915,18755370,100.0,,100.0,,100.0,100.0,18111878,18170091,100.0,100.0,18009850.0,18456291.75,United States,
1,New England .....................,941945,959027,964703,990080,1002400,1012104,5.3,5.4,5.4,5.4,5.4,5.4,982152,983200,5.4,5.4,968627.3,996946.0,New England,
2,Connecticut ................,250628,256514,257895,262341,263973,266597,1.4,1.4,1.4,1.4,1.4,1.4,260185,260604,1.4,1.4,258198.0,263378.75,Connecticut,New England
3,Maine ........................,55930,57230,57716,59224,59478,59857,0.3,0.3,0.3,0.3,0.3,0.3,58453,58540,0.3,0.3,57799.67,59274.75,Maine,New England
4,Massachusetts ............,478320,485938,487509,503357,511392,516483,2.7,2.7,2.7,2.7,2.8,2.8,500634,500418,2.8,2.8,491360.3,507912.5,Massachusetts,New England
5,New Hampshire ...........,72395,73810,75038,77201,78329,79155,0.4,0.4,0.4,0.4,0.4,0.4,75833,76737,0.4,0.4,74893.67,77855.5,New Hampshire,New England
6,Rhode Island ..............,54858,55401,55884,57099,58017,58527,0.3,0.3,0.3,0.3,0.3,0.3,56457,56087,0.3,0.3,55914.0,57432.5,Rhode Island,New England
7,Vermont .....................,29815,30135,30660,30859,31211,31483,0.2,0.2,0.2,0.2,0.2,0.2,30591,30814,0.2,0.2,30462.0,31091.75,Vermont,New England
8,Mideast .......................,3223326,3270682,3302756,3358622,3381370,3408478,18.2,,18.3,,18.2,18.2,3301626,3329683,18.2,18.3,3291688.0,3369538.25,Mideast,
9,Delaware ....................,68216,68898,69086,69747,71080,71769,0.4,0.4,0.4,0.4,0.4,0.4,69256,68952,0.4,0.4,69080.0,70387.0,Delaware,Mideast


In [311]:
states = ["Alaska", "Alabama", "Arkansas", "Arizona", "California", "Colorado", "Connecticut", "District of Columbia",
          "Delaware", "Florida", "Georgia", "Hawaii", "Iowa", "Idaho", "Illinois", "Indiana", "Kansas", "Kentucky", 
          "Louisiana", "Massachusetts", "Maryland", "Maine", "Michigan", "Minnesota", "Missouri", "Mississippi", "Montana", 
          "North Carolina", "North Dakota", "Nebraska", "New Hampshire", "New Jersey", "New Mexico", "Nevada", "New York", 
          "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota", "Tennessee", 
          "Texas", "Utah", "Virginia", "Vermont", "Washington", "Wisconsin", "West Virginia", "Wyoming"]
        
        # empty DataFrame to hold the new columns
GDPdf = pd.DataFrame()
        # count variable to increase the index by one after each row is added
count = 0
        
        # for loop to iterate through the years specified by the instance
for year in range(GDP2015_2016.yearStart, GDP2015_2016.yearEnd + 1):
            # for each state in that year, add in the following columns
    for state in states:
        GDPdf.loc[count, 'year'] = str(year)
        GDPdf.loc[count, 'state'] = state
        GDPdf.loc[count, 'current dollars'] = GDP2015_2016.dollars
        GDPdf.loc[count, 'GDP'] = GDP2015_2016.df.loc[GDP2015_2016.df['Cleaned Area'] == state, str(year)].item()
        GDPdf.loc[count, 'GDP_area'] = GDP2015_2016.df.loc[GDP2015_2016.df['Cleaned Area'] == state, 'Geo Loc'].item()
        count += 1

GDP2015_2016.df = GDPdf
GDP2015_2016.df

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,2015,Alaska,2016,53180.0,Far West
1,2015,Alabama,2016,201033.7,Southeast
2,2015,Arkansas,2016,119214.7,Southeast
3,2015,Arizona,2016,292866.7,Southwest
4,2015,California,2016,2507471.0,Far West
5,2015,Colorado,2016,314441.3,Rocky Mountain
6,2015,Connecticut,2016,258198.0,New England
7,2015,District of Columbia,2016,121458.3,Mideast
8,2015,Delaware,2016,69080.0,Mideast
9,2015,Florida,2016,889744.7,Southeast


# 2017 - 2018 Quarterly Data

In [312]:
GDP17_18 = tabula.read_pdf('GDP2017-2018_quarterly.pdf', pages = 8, stream = True)
GDP17_18

[              Unnamed: 0  Unnamed: 1  Unnamed: 2  Unnamed: 3  Unnamed: 4  \
 0                    NaN         NaN         NaN         NaN         NaN   
 1                    NaN         NaN         NaN      2017.0         NaN   
 2                    NaN          Q1          Q2         NaN          Q3   
 3          United States  19,162,550  19,359,123         NaN  19,588,074   
 4            New England   1,025,381   1,036,158         NaN   1,049,408   
 5            Connecticut     261,977     264,949         NaN     267,504   
 6                  Maine      61,009      61,230         NaN      62,104   
 7          Massachusetts     531,748     537,860         NaN     545,137   
 8          New Hampshire      80,163      80,427         NaN      82,235   
 9           Rhode Island      58,159      59,201         NaN      59,692   
 10               Vermont      32,325      32,490         NaN      32,737   
 11               Mideast   3,506,591   3,529,933         NaN   3,575,801   

In [313]:
GDP17_18 = pd.concat(GDP17_18, ignore_index = True)
GDP17_18

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Millions of dollars,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8
0,,,,,,Percent of the U.S.Seasonally adjusted at annu...,,,,
1,,,,2017.0,,2018 2017,,2018.0,,
2,,Q1,Q2,,Q3,Q4 Q1 Q2 Q3 Q4 Q1 Q2 Q3 Q4 Q1,Q2,,Q3,Q4
3,United States,19162550,19359123,,19588074,"19,831,829 20,041,047 20,411,924 20,658,204 20...",100.0,,100.0,100.0
4,New England,1025381,1036158,,1049408,"1,054,099 1,068,841 1,078,395 1,091,558 1,102,...",5.3,,5.3,5.3
5,Connecticut,261977,264949,,267504,"267,711 270,268 269,863 276,934 279,653 1.4 1....",1.3,,1.3,1.3
6,Maine,61009,61230,,62104,"62,538 63,173 64,082 64,798 65,349 0.3 0.3 0.3...",0.3,,0.3,0.3
7,Massachusetts,531748,537860,,545137,"549,051 558,137 565,224 570,024 575,635 2.8 2....",2.8,,2.8,2.8
8,New Hampshire,80163,80427,,82235,"81,891 83,566 84,035 85,201 86,046 0.4 0.4 0.4...",0.4,,0.4,0.4
9,Rhode Island,58159,59201,,59692,"59,966 60,503 61,432 60,807 61,341 0.3 0.3 0.3...",0.3,,0.3,0.3


In [314]:
GDP17_18 = GDP17_18.drop(GDP17_18.index[0:3]).reset_index(drop = True)
GDP17_18

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Millions of dollars,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8
0,United States,19162550,19359123,,19588074,"19,831,829 20,041,047 20,411,924 20,658,204 20...",100.0,,100.0,100.0
1,New England,1025381,1036158,,1049408,"1,054,099 1,068,841 1,078,395 1,091,558 1,102,...",5.3,,5.3,5.3
2,Connecticut,261977,264949,,267504,"267,711 270,268 269,863 276,934 279,653 1.4 1....",1.3,,1.3,1.3
3,Maine,61009,61230,,62104,"62,538 63,173 64,082 64,798 65,349 0.3 0.3 0.3...",0.3,,0.3,0.3
4,Massachusetts,531748,537860,,545137,"549,051 558,137 565,224 570,024 575,635 2.8 2....",2.8,,2.8,2.8
5,New Hampshire,80163,80427,,82235,"81,891 83,566 84,035 85,201 86,046 0.4 0.4 0.4...",0.4,,0.4,0.4
6,Rhode Island,58159,59201,,59692,"59,966 60,503 61,432 60,807 61,341 0.3 0.3 0.3...",0.3,,0.3,0.3
7,Vermont,32325,32490,,32737,"32,942 33,193 33,760 33,794 34,154 0.2 0.2 0.2...",0.2,,0.2,0.2
8,Mideast,3506591,3529933,,3575801,"3,610,059 3,641,801 3,706,637 3,746,976 3,776,...",18.2,,18.1,18.1
9,Delaware,71844,71164,,72953,"72,708 73,168 74,104 76,084 76,537 0.4 0.4 0.4...",0.4,,0.4,0.4


In [315]:
GDP17_18 = GDP17_18.drop(['Unnamed: 3', 'Unnamed: 6'], axis = 1)

for i in GDP17_18['Millions of dollars'].index:
    splitList = GDP17_18.loc[i, 'Millions of dollars'].split(" ")
    GDP17_18.loc[i, '2017 Q4'] = splitList[0]
    GDP17_18.loc[i, '2018 Q1'] = splitList[1]
    GDP17_18.loc[i, '2018 Q2'] = splitList[2]
    GDP17_18.loc[i, '2018 Q3'] = splitList[3]
    GDP17_18.loc[i, '2018 Q4'] = splitList[4]
    GDP17_18.loc[i, '2017 Q1 %'] = splitList[5]
    GDP17_18.loc[i, '2017 Q2 %'] = splitList[6]
    GDP17_18.loc[i, '2017 Q3 %'] = splitList[7]
    GDP17_18.loc[i, '2017 Q4 %'] = splitList[8]
    GDP17_18.loc[i, '2018 Q1 %'] = splitList[9]

GDP17_18 = GDP17_18.drop('Millions of dollars', axis = 1)
    
GDP17_18

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 4,Unnamed: 5,Unnamed: 7,Unnamed: 8,2017 Q4,2018 Q1,2018 Q2,2018 Q3,2018 Q4,2017 Q1 %,2017 Q2 %,2017 Q3 %,2017 Q4 %,2018 Q1 %
0,United States,19162550,19359123,19588074,100.0,100.0,100.0,19831829,20041047,20411924,20658204,20865140,100.0,100.0,100.0,100.0,100.0
1,New England,1025381,1036158,1049408,5.3,5.3,5.3,1054099,1068841,1078395,1091558,1102178,5.4,5.4,5.4,5.3,5.3
2,Connecticut,261977,264949,267504,1.3,1.3,1.3,267711,270268,269863,276934,279653,1.4,1.4,1.4,1.3,1.3
3,Maine,61009,61230,62104,0.3,0.3,0.3,62538,63173,64082,64798,65349,0.3,0.3,0.3,0.3,0.3
4,Massachusetts,531748,537860,545137,2.8,2.8,2.8,549051,558137,565224,570024,575635,2.8,2.8,2.8,2.8,2.8
5,New Hampshire,80163,80427,82235,0.4,0.4,0.4,81891,83566,84035,85201,86046,0.4,0.4,0.4,0.4,0.4
6,Rhode Island,58159,59201,59692,0.3,0.3,0.3,59966,60503,61432,60807,61341,0.3,0.3,0.3,0.3,0.3
7,Vermont,32325,32490,32737,0.2,0.2,0.2,32942,33193,33760,33794,34154,0.2,0.2,0.2,0.2,0.2
8,Mideast,3506591,3529933,3575801,18.2,18.1,18.1,3610059,3641801,3706637,3746976,3776242,18.3,18.2,18.3,18.2,18.2
9,Delaware,71844,71164,72953,0.4,0.4,0.4,72708,73168,74104,76084,76537,0.4,0.4,0.4,0.4,0.4


In [316]:
GDP17_18.columns = ['Area', '2017 Q1', '2017 Q2', '2017 Q3', '2018 Q2 %',
                 '2018 Q3 %', '2018 Q4 %','2017 Q4', '2018 Q1', '2018 Q2', '2018 Q3',
                 '2018 Q4', '2017 Q1 %', '2017 Q2 %', '2017 Q3 %', '2017 Q4 %', '2018 Q1 %']
GDP17_18

Unnamed: 0,Area,2017 Q1,2017 Q2,2017 Q3,2018 Q2 %,2018 Q3 %,2018 Q4 %,2017 Q4,2018 Q1,2018 Q2,2018 Q3,2018 Q4,2017 Q1 %,2017 Q2 %,2017 Q3 %,2017 Q4 %,2018 Q1 %
0,United States,19162550,19359123,19588074,100.0,100.0,100.0,19831829,20041047,20411924,20658204,20865140,100.0,100.0,100.0,100.0,100.0
1,New England,1025381,1036158,1049408,5.3,5.3,5.3,1054099,1068841,1078395,1091558,1102178,5.4,5.4,5.4,5.3,5.3
2,Connecticut,261977,264949,267504,1.3,1.3,1.3,267711,270268,269863,276934,279653,1.4,1.4,1.4,1.3,1.3
3,Maine,61009,61230,62104,0.3,0.3,0.3,62538,63173,64082,64798,65349,0.3,0.3,0.3,0.3,0.3
4,Massachusetts,531748,537860,545137,2.8,2.8,2.8,549051,558137,565224,570024,575635,2.8,2.8,2.8,2.8,2.8
5,New Hampshire,80163,80427,82235,0.4,0.4,0.4,81891,83566,84035,85201,86046,0.4,0.4,0.4,0.4,0.4
6,Rhode Island,58159,59201,59692,0.3,0.3,0.3,59966,60503,61432,60807,61341,0.3,0.3,0.3,0.3,0.3
7,Vermont,32325,32490,32737,0.2,0.2,0.2,32942,33193,33760,33794,34154,0.2,0.2,0.2,0.2,0.2
8,Mideast,3506591,3529933,3575801,18.2,18.1,18.1,3610059,3641801,3706637,3746976,3776242,18.3,18.2,18.3,18.2,18.2
9,Delaware,71844,71164,72953,0.4,0.4,0.4,72708,73168,74104,76084,76537,0.4,0.4,0.4,0.4,0.4


In [317]:
for column in GDP17_18.columns:
    if column == 'Area':
        pass
    else:
        for i in GDP17_18[column].index:
            if isinstance(GDP17_18.loc[i, column], float):
                pass
            else:
                GDP17_18.loc[i, column] = GDP17_18.loc[i, column].replace(',','')
GDP17_18 = GDP17_18.apply(pd.to_numeric, errors = 'ignore')
GDP17_18

Unnamed: 0,Area,2017 Q1,2017 Q2,2017 Q3,2018 Q2 %,2018 Q3 %,2018 Q4 %,2017 Q4,2018 Q1,2018 Q2,2018 Q3,2018 Q4,2017 Q1 %,2017 Q2 %,2017 Q3 %,2017 Q4 %,2018 Q1 %
0,United States,19162550,19359123,19588074,100.0,100.0,100.0,19831829,20041047,20411924,20658204,20865140,100.0,100.0,100.0,100.0,100.0
1,New England,1025381,1036158,1049408,5.3,5.3,5.3,1054099,1068841,1078395,1091558,1102178,5.4,5.4,5.4,5.3,5.3
2,Connecticut,261977,264949,267504,1.3,1.3,1.3,267711,270268,269863,276934,279653,1.4,1.4,1.4,1.3,1.3
3,Maine,61009,61230,62104,0.3,0.3,0.3,62538,63173,64082,64798,65349,0.3,0.3,0.3,0.3,0.3
4,Massachusetts,531748,537860,545137,2.8,2.8,2.8,549051,558137,565224,570024,575635,2.8,2.8,2.8,2.8,2.8
5,New Hampshire,80163,80427,82235,0.4,0.4,0.4,81891,83566,84035,85201,86046,0.4,0.4,0.4,0.4,0.4
6,Rhode Island,58159,59201,59692,0.3,0.3,0.3,59966,60503,61432,60807,61341,0.3,0.3,0.3,0.3,0.3
7,Vermont,32325,32490,32737,0.2,0.2,0.2,32942,33193,33760,33794,34154,0.2,0.2,0.2,0.2,0.2
8,Mideast,3506591,3529933,3575801,18.2,18.1,18.1,3610059,3641801,3706637,3746976,3776242,18.3,18.2,18.3,18.2,18.2
9,Delaware,71844,71164,72953,0.4,0.4,0.4,72708,73168,74104,76084,76537,0.4,0.4,0.4,0.4,0.4


In [318]:
gdp2017 = ['2017 Q1', '2017 Q2', '2017 Q3', '2017 Q4']
gdp2018 = ['2018 Q1', '2018 Q2', '2018 Q3', '2018 Q4']

filter_2017 = GDP17_18.filter(items = gdp2017)
filter_2018 = GDP17_18.filter(items = gdp2018)

GDP17_18['2017'] = filter_2017.mean(axis = 1)
GDP17_18['2018'] = filter_2018.mean(axis = 1)

GDP17_18

Unnamed: 0,Area,2017 Q1,2017 Q2,2017 Q3,2018 Q2 %,2018 Q3 %,2018 Q4 %,2017 Q4,2018 Q1,2018 Q2,2018 Q3,2018 Q4,2017 Q1 %,2017 Q2 %,2017 Q3 %,2017 Q4 %,2018 Q1 %,2017,2018
0,United States,19162550,19359123,19588074,100.0,100.0,100.0,19831829,20041047,20411924,20658204,20865140,100.0,100.0,100.0,100.0,100.0,19485394.0,20494078.75
1,New England,1025381,1036158,1049408,5.3,5.3,5.3,1054099,1068841,1078395,1091558,1102178,5.4,5.4,5.4,5.3,5.3,1041261.5,1085243.0
2,Connecticut,261977,264949,267504,1.3,1.3,1.3,267711,270268,269863,276934,279653,1.4,1.4,1.4,1.3,1.3,265535.25,274179.5
3,Maine,61009,61230,62104,0.3,0.3,0.3,62538,63173,64082,64798,65349,0.3,0.3,0.3,0.3,0.3,61720.25,64350.5
4,Massachusetts,531748,537860,545137,2.8,2.8,2.8,549051,558137,565224,570024,575635,2.8,2.8,2.8,2.8,2.8,540949.0,567255.0
5,New Hampshire,80163,80427,82235,0.4,0.4,0.4,81891,83566,84035,85201,86046,0.4,0.4,0.4,0.4,0.4,81179.0,84712.0
6,Rhode Island,58159,59201,59692,0.3,0.3,0.3,59966,60503,61432,60807,61341,0.3,0.3,0.3,0.3,0.3,59254.5,61020.75
7,Vermont,32325,32490,32737,0.2,0.2,0.2,32942,33193,33760,33794,34154,0.2,0.2,0.2,0.2,0.2,32623.5,33725.25
8,Mideast,3506591,3529933,3575801,18.2,18.1,18.1,3610059,3641801,3706637,3746976,3776242,18.3,18.2,18.3,18.2,18.2,3555596.0,3717914.0
9,Delaware,71844,71164,72953,0.4,0.4,0.4,72708,73168,74104,76084,76537,0.4,0.4,0.4,0.4,0.4,72167.25,74973.25


In [319]:
GDP2017_2018 = QuarterlyGDP('GDP2017-2018_quarterly.pdf', 8, 2017, 2018, '2018')

In [320]:
GDP2017_2018.df = GDP17_18

In [321]:
GDP2017_2018.df

Unnamed: 0,Area,2017 Q1,2017 Q2,2017 Q3,2018 Q2 %,2018 Q3 %,2018 Q4 %,2017 Q4,2018 Q1,2018 Q2,2018 Q3,2018 Q4,2017 Q1 %,2017 Q2 %,2017 Q3 %,2017 Q4 %,2018 Q1 %,2017,2018
0,United States,19162550,19359123,19588074,100.0,100.0,100.0,19831829,20041047,20411924,20658204,20865140,100.0,100.0,100.0,100.0,100.0,19485394.0,20494078.75
1,New England,1025381,1036158,1049408,5.3,5.3,5.3,1054099,1068841,1078395,1091558,1102178,5.4,5.4,5.4,5.3,5.3,1041261.5,1085243.0
2,Connecticut,261977,264949,267504,1.3,1.3,1.3,267711,270268,269863,276934,279653,1.4,1.4,1.4,1.3,1.3,265535.25,274179.5
3,Maine,61009,61230,62104,0.3,0.3,0.3,62538,63173,64082,64798,65349,0.3,0.3,0.3,0.3,0.3,61720.25,64350.5
4,Massachusetts,531748,537860,545137,2.8,2.8,2.8,549051,558137,565224,570024,575635,2.8,2.8,2.8,2.8,2.8,540949.0,567255.0
5,New Hampshire,80163,80427,82235,0.4,0.4,0.4,81891,83566,84035,85201,86046,0.4,0.4,0.4,0.4,0.4,81179.0,84712.0
6,Rhode Island,58159,59201,59692,0.3,0.3,0.3,59966,60503,61432,60807,61341,0.3,0.3,0.3,0.3,0.3,59254.5,61020.75
7,Vermont,32325,32490,32737,0.2,0.2,0.2,32942,33193,33760,33794,34154,0.2,0.2,0.2,0.2,0.2,32623.5,33725.25
8,Mideast,3506591,3529933,3575801,18.2,18.1,18.1,3610059,3641801,3706637,3746976,3776242,18.3,18.2,18.3,18.2,18.2,3555596.0,3717914.0
9,Delaware,71844,71164,72953,0.4,0.4,0.4,72708,73168,74104,76084,76537,0.4,0.4,0.4,0.4,0.4,72167.25,74973.25


In [322]:
GDP2017_2018.cleanStates()

Unnamed: 0,Area,2017 Q1,2017 Q2,2017 Q3,2018 Q2 %,2018 Q3 %,2018 Q4 %,2017 Q4,2018 Q1,2018 Q2,2018 Q3,2018 Q4,2017 Q1 %,2017 Q2 %,2017 Q3 %,2017 Q4 %,2018 Q1 %,2017,2018,Cleaned Area
0,United States,19162550,19359123,19588074,100.0,100.0,100.0,19831829,20041047,20411924,20658204,20865140,100.0,100.0,100.0,100.0,100.0,19485394.0,20494078.75,United States
1,New England,1025381,1036158,1049408,5.3,5.3,5.3,1054099,1068841,1078395,1091558,1102178,5.4,5.4,5.4,5.3,5.3,1041261.5,1085243.0,New England
2,Connecticut,261977,264949,267504,1.3,1.3,1.3,267711,270268,269863,276934,279653,1.4,1.4,1.4,1.3,1.3,265535.25,274179.5,Connecticut
3,Maine,61009,61230,62104,0.3,0.3,0.3,62538,63173,64082,64798,65349,0.3,0.3,0.3,0.3,0.3,61720.25,64350.5,Maine
4,Massachusetts,531748,537860,545137,2.8,2.8,2.8,549051,558137,565224,570024,575635,2.8,2.8,2.8,2.8,2.8,540949.0,567255.0,Massachusetts
5,New Hampshire,80163,80427,82235,0.4,0.4,0.4,81891,83566,84035,85201,86046,0.4,0.4,0.4,0.4,0.4,81179.0,84712.0,New Hampshire
6,Rhode Island,58159,59201,59692,0.3,0.3,0.3,59966,60503,61432,60807,61341,0.3,0.3,0.3,0.3,0.3,59254.5,61020.75,Rhode Island
7,Vermont,32325,32490,32737,0.2,0.2,0.2,32942,33193,33760,33794,34154,0.2,0.2,0.2,0.2,0.2,32623.5,33725.25,Vermont
8,Mideast,3506591,3529933,3575801,18.2,18.1,18.1,3610059,3641801,3706637,3746976,3776242,18.3,18.2,18.3,18.2,18.2,3555596.0,3717914.0,Mideast
9,Delaware,71844,71164,72953,0.4,0.4,0.4,72708,73168,74104,76084,76537,0.4,0.4,0.4,0.4,0.4,72167.25,74973.25,Delaware


In [323]:
GDP2017_2018.addGeoLocColumn()

Unnamed: 0,Area,2017 Q1,2017 Q2,2017 Q3,2018 Q2 %,2018 Q3 %,2018 Q4 %,2017 Q4,2018 Q1,2018 Q2,2018 Q3,2018 Q4,2017 Q1 %,2017 Q2 %,2017 Q3 %,2017 Q4 %,2018 Q1 %,2017,2018,Cleaned Area,Geo Loc
0,United States,19162550,19359123,19588074,100.0,100.0,100.0,19831829,20041047,20411924,20658204,20865140,100.0,100.0,100.0,100.0,100.0,19485394.0,20494078.75,United States,
1,New England,1025381,1036158,1049408,5.3,5.3,5.3,1054099,1068841,1078395,1091558,1102178,5.4,5.4,5.4,5.3,5.3,1041261.5,1085243.0,New England,
2,Connecticut,261977,264949,267504,1.3,1.3,1.3,267711,270268,269863,276934,279653,1.4,1.4,1.4,1.3,1.3,265535.25,274179.5,Connecticut,New England
3,Maine,61009,61230,62104,0.3,0.3,0.3,62538,63173,64082,64798,65349,0.3,0.3,0.3,0.3,0.3,61720.25,64350.5,Maine,New England
4,Massachusetts,531748,537860,545137,2.8,2.8,2.8,549051,558137,565224,570024,575635,2.8,2.8,2.8,2.8,2.8,540949.0,567255.0,Massachusetts,New England
5,New Hampshire,80163,80427,82235,0.4,0.4,0.4,81891,83566,84035,85201,86046,0.4,0.4,0.4,0.4,0.4,81179.0,84712.0,New Hampshire,New England
6,Rhode Island,58159,59201,59692,0.3,0.3,0.3,59966,60503,61432,60807,61341,0.3,0.3,0.3,0.3,0.3,59254.5,61020.75,Rhode Island,New England
7,Vermont,32325,32490,32737,0.2,0.2,0.2,32942,33193,33760,33794,34154,0.2,0.2,0.2,0.2,0.2,32623.5,33725.25,Vermont,New England
8,Mideast,3506591,3529933,3575801,18.2,18.1,18.1,3610059,3641801,3706637,3746976,3776242,18.3,18.2,18.3,18.2,18.2,3555596.0,3717914.0,Mideast,
9,Delaware,71844,71164,72953,0.4,0.4,0.4,72708,73168,74104,76084,76537,0.4,0.4,0.4,0.4,0.4,72167.25,74973.25,Delaware,Mideast


In [324]:
states = ["Alaska", "Alabama", "Arkansas", "Arizona", "California", "Colorado", "Connecticut", "District of Columbia",
          "Delaware", "Florida", "Georgia", "Hawaii", "Iowa", "Idaho", "Illinois", "Indiana", "Kansas", "Kentucky", 
          "Louisiana", "Massachusetts", "Maryland", "Maine", "Michigan", "Minnesota", "Missouri", "Mississippi", "Montana", 
          "North Carolina", "North Dakota", "Nebraska", "New Hampshire", "New Jersey", "New Mexico", "Nevada", "New York", 
          "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota", "Tennessee", 
          "Texas", "Utah", "Virginia", "Vermont", "Washington", "Wisconsin", "West Virginia", "Wyoming"]
        
        # empty DataFrame to hold the new columns
GDPdf = pd.DataFrame()
        # count variable to increase the index by one after each row is added
count = 0
        
        # for loop to iterate through the years specified by the instance
for year in range(GDP2017_2018.yearStart, GDP2017_2018.yearEnd + 1):
            # for each state in that year, add in the following columns
    for state in states:
        GDPdf.loc[count, 'year'] = str(year)
        GDPdf.loc[count, 'state'] = state
        GDPdf.loc[count, 'current dollars'] = GDP2017_2018.dollars
        GDPdf.loc[count, 'GDP'] = GDP2017_2018.df.loc[GDP2017_2018.df['Cleaned Area'] == state, str(year)].item()
        GDPdf.loc[count, 'GDP_area'] = GDP2017_2018.df.loc[GDP2017_2018.df['Cleaned Area'] == state, 'Geo Loc'].item()
        count += 1

GDP2017_2018.df = GDPdf
GDP2017_2018.df

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,2017,Alaska,2018,51686.75,Far West
1,2017,Alabama,2018,211665.75,Southeast
2,2017,Arkansas,2018,123713.75,Southeast
3,2017,Arizona,2018,326539.5,Southwest
4,2017,California,2018,2809922.5,Far West
5,2017,Colorado,2018,347195.75,Rocky Mountain
6,2017,Connecticut,2018,265535.25,New England
7,2017,District of Columbia,2018,134901.25,Mideast
8,2017,Delaware,2018,72167.25,Mideast
9,2017,Florida,2018,979464.5,Southeast


# 2019 Quarterly Data

In [325]:
GDP19_20 = tabula.read_pdf('GDP2019-2020_quarterly.pdf', pages = 7, stream = True)
GDP19_20

[              Unnamed: 0  Unnamed: 1  \
 0                    NaN         NaN   
 1                    NaN         NaN   
 2                    NaN          Q1   
 3          United States  21,098,827   
 4            New England   1,118,703   
 5            Connecticut     281,659   
 6                  Maine      66,590   
 7          Massachusetts     586,347   
 8          New Hampshire      87,244   
 9           Rhode Island      62,578   
 10               Vermont      34,284   
 11               Mideast   3,786,089   
 12              Delaware      74,270   
 13  District of Columbia     144,075   
 14              Maryland     422,247   
 15            New Jersey     636,068   
 16              New York   1,707,520   
 17          Pennsylvania     801,909   
 18           Great Lakes   2,823,073   
 19              Illinois     884,430   
 20               Indiana     372,061   
 21              Michigan     534,427   
 22                  Ohio     689,542   
 23             

In [326]:
GDP19_20 = pd.concat(GDP19_20, ignore_index = True)
GDP19_20

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Millions of dollars,Unnamed: 2,Unnamed: 3
0,,,Percent of the U.S.Seasonally adjusted at annu...,,
1,,,2019 2020 2019,,2020
2,,Q1,Q2 Q3 Q4 Q1 Q1 Q2 Q3,Q4,Q1
3,United States,21098827,"21,340,267 21,542,540 21,729,124 21,539,689 10...",100.0,100.0
4,New England,1118703,"1,130,084 1,142,586 1,151,182 1,143,978 5.3 5....",5.3,5.3
5,Connecticut,281659,"284,357 287,560 288,985 287,711 1.3 1.3 1.3",1.3,1.3
6,Maine,66590,"67,138 67,905 68,441 67,793 0.3 0.3 0.3",0.3,0.3
7,Massachusetts,586347,"592,588 599,092 604,208 600,554 2.8 2.8 2.8",2.8,2.8
8,New Hampshire,87244,"88,147 89,152 89,836 89,128 0.4 0.4 0.4",0.4,0.4
9,Rhode Island,62578,"63,242 63,903 64,441 63,865 0.3 0.3 0.3",0.3,0.3


In [327]:
GDP19_20 = GDP19_20.drop(GDP19_20.index[0:3]).reset_index(drop = True)
GDP19_20

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Millions of dollars,Unnamed: 2,Unnamed: 3
0,United States,21098827,"21,340,267 21,542,540 21,729,124 21,539,689 10...",100.0,100.0
1,New England,1118703,"1,130,084 1,142,586 1,151,182 1,143,978 5.3 5....",5.3,5.3
2,Connecticut,281659,"284,357 287,560 288,985 287,711 1.3 1.3 1.3",1.3,1.3
3,Maine,66590,"67,138 67,905 68,441 67,793 0.3 0.3 0.3",0.3,0.3
4,Massachusetts,586347,"592,588 599,092 604,208 600,554 2.8 2.8 2.8",2.8,2.8
5,New Hampshire,87244,"88,147 89,152 89,836 89,128 0.4 0.4 0.4",0.4,0.4
6,Rhode Island,62578,"63,242 63,903 64,441 63,865 0.3 0.3 0.3",0.3,0.3
7,Vermont,34284,"34,612 34,973 35,271 34,927 0.2 0.2 0.2",0.2,0.2
8,Mideast,3786089,"3,827,030 3,860,095 3,887,642 3,847,859 17.9 1...",17.9,17.9
9,Delaware,74270,"75,216 75,765 76,410 76,086 0.4 0.4 0.4",0.4,0.4


In [328]:
for i in GDP19_20['Millions of dollars'].index:
    splitList = GDP19_20.loc[i, 'Millions of dollars'].split(" ")
    GDP19_20.loc[i, '2019 Q2'] = splitList[0]
    GDP19_20.loc[i, '2019 Q3'] = splitList[1]
    GDP19_20.loc[i, '2019 Q4'] = splitList[2]
    GDP19_20.loc[i, '2020 Q1'] = splitList[3]
    GDP19_20.loc[i, '2019 Q1 %'] = splitList[4]
    GDP19_20.loc[i, '2019 Q2 %'] = splitList[5]
    GDP19_20.loc[i, '2019 Q3 %'] = splitList[6]

GDP19_20 = GDP19_20.drop('Millions of dollars', axis = 1)
    
GDP19_20

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,2019 Q2,2019 Q3,2019 Q4,2020 Q1,2019 Q1 %,2019 Q2 %,2019 Q3 %
0,United States,21098827,100.0,100.0,21340267,21542540,21729124,21539689,100.0,100.0,100.0
1,New England,1118703,5.3,5.3,1130084,1142586,1151182,1143978,5.3,5.3,5.3
2,Connecticut,281659,1.3,1.3,284357,287560,288985,287711,1.3,1.3,1.3
3,Maine,66590,0.3,0.3,67138,67905,68441,67793,0.3,0.3,0.3
4,Massachusetts,586347,2.8,2.8,592588,599092,604208,600554,2.8,2.8,2.8
5,New Hampshire,87244,0.4,0.4,88147,89152,89836,89128,0.4,0.4,0.4
6,Rhode Island,62578,0.3,0.3,63242,63903,64441,63865,0.3,0.3,0.3
7,Vermont,34284,0.2,0.2,34612,34973,35271,34927,0.2,0.2,0.2
8,Mideast,3786089,17.9,17.9,3827030,3860095,3887642,3847859,17.9,17.9,17.9
9,Delaware,74270,0.4,0.4,75216,75765,76410,76086,0.4,0.4,0.4


In [329]:
GDP19_20.columns = ['Area', '2019 Q1', '2019 Q4%', '2020 Q1%', '2019 Q2',
                 '2019 Q3', '2019 Q4','2020 Q1', '2019 Q1 %', '2019 Q2 %', '2019 Q3 %']
GDP19_20

Unnamed: 0,Area,2019 Q1,2019 Q4%,2020 Q1%,2019 Q2,2019 Q3,2019 Q4,2020 Q1,2019 Q1 %,2019 Q2 %,2019 Q3 %
0,United States,21098827,100.0,100.0,21340267,21542540,21729124,21539689,100.0,100.0,100.0
1,New England,1118703,5.3,5.3,1130084,1142586,1151182,1143978,5.3,5.3,5.3
2,Connecticut,281659,1.3,1.3,284357,287560,288985,287711,1.3,1.3,1.3
3,Maine,66590,0.3,0.3,67138,67905,68441,67793,0.3,0.3,0.3
4,Massachusetts,586347,2.8,2.8,592588,599092,604208,600554,2.8,2.8,2.8
5,New Hampshire,87244,0.4,0.4,88147,89152,89836,89128,0.4,0.4,0.4
6,Rhode Island,62578,0.3,0.3,63242,63903,64441,63865,0.3,0.3,0.3
7,Vermont,34284,0.2,0.2,34612,34973,35271,34927,0.2,0.2,0.2
8,Mideast,3786089,17.9,17.9,3827030,3860095,3887642,3847859,17.9,17.9,17.9
9,Delaware,74270,0.4,0.4,75216,75765,76410,76086,0.4,0.4,0.4


In [330]:
for column in GDP19_20.columns:
    if column == 'Area':
        pass
    else:
        for i in GDP19_20[column].index:
            if isinstance(GDP19_20.loc[i, column], float):
                pass
            else:
                GDP19_20.loc[i, column] = GDP19_20.loc[i, column].replace(',','')
GDP19_20 = GDP19_20.apply(pd.to_numeric, errors = 'ignore')
GDP19_20

Unnamed: 0,Area,2019 Q1,2019 Q4%,2020 Q1%,2019 Q2,2019 Q3,2019 Q4,2020 Q1,2019 Q1 %,2019 Q2 %,2019 Q3 %
0,United States,21098827,100.0,100.0,21340267,21542540,21729124,21539689,100.0,100.0,100.0
1,New England,1118703,5.3,5.3,1130084,1142586,1151182,1143978,5.3,5.3,5.3
2,Connecticut,281659,1.3,1.3,284357,287560,288985,287711,1.3,1.3,1.3
3,Maine,66590,0.3,0.3,67138,67905,68441,67793,0.3,0.3,0.3
4,Massachusetts,586347,2.8,2.8,592588,599092,604208,600554,2.8,2.8,2.8
5,New Hampshire,87244,0.4,0.4,88147,89152,89836,89128,0.4,0.4,0.4
6,Rhode Island,62578,0.3,0.3,63242,63903,64441,63865,0.3,0.3,0.3
7,Vermont,34284,0.2,0.2,34612,34973,35271,34927,0.2,0.2,0.2
8,Mideast,3786089,17.9,17.9,3827030,3860095,3887642,3847859,17.9,17.9,17.9
9,Delaware,74270,0.4,0.4,75216,75765,76410,76086,0.4,0.4,0.4


In [331]:
gdp2019 = ['2019 Q1', '2019 Q2', '2019 Q3', '2019 Q4']

filter_2019 = GDP19_20.filter(items = gdp2019)

GDP19_20['2019'] = filter_2019.mean(axis = 1)

GDP19_20

Unnamed: 0,Area,2019 Q1,2019 Q4%,2020 Q1%,2019 Q2,2019 Q3,2019 Q4,2020 Q1,2019 Q1 %,2019 Q2 %,2019 Q3 %,2019
0,United States,21098827,100.0,100.0,21340267,21542540,21729124,21539689,100.0,100.0,100.0,21427689.5
1,New England,1118703,5.3,5.3,1130084,1142586,1151182,1143978,5.3,5.3,5.3,1135638.75
2,Connecticut,281659,1.3,1.3,284357,287560,288985,287711,1.3,1.3,1.3,285640.25
3,Maine,66590,0.3,0.3,67138,67905,68441,67793,0.3,0.3,0.3,67518.5
4,Massachusetts,586347,2.8,2.8,592588,599092,604208,600554,2.8,2.8,2.8,595558.75
5,New Hampshire,87244,0.4,0.4,88147,89152,89836,89128,0.4,0.4,0.4,88594.75
6,Rhode Island,62578,0.3,0.3,63242,63903,64441,63865,0.3,0.3,0.3,63541.0
7,Vermont,34284,0.2,0.2,34612,34973,35271,34927,0.2,0.2,0.2,34785.0
8,Mideast,3786089,17.9,17.9,3827030,3860095,3887642,3847859,17.9,17.9,17.9,3840214.0
9,Delaware,74270,0.4,0.4,75216,75765,76410,76086,0.4,0.4,0.4,75415.25


In [332]:
GDP2019_2020 = QuarterlyGDP('GDP2019-2020_quarterly.pdf', 7, 2019, 2019, '2019')

In [333]:
GDP2019_2020.df = GDP19_20

In [334]:
GDP2019_2020.df

Unnamed: 0,Area,2019 Q1,2019 Q4%,2020 Q1%,2019 Q2,2019 Q3,2019 Q4,2020 Q1,2019 Q1 %,2019 Q2 %,2019 Q3 %,2019
0,United States,21098827,100.0,100.0,21340267,21542540,21729124,21539689,100.0,100.0,100.0,21427689.5
1,New England,1118703,5.3,5.3,1130084,1142586,1151182,1143978,5.3,5.3,5.3,1135638.75
2,Connecticut,281659,1.3,1.3,284357,287560,288985,287711,1.3,1.3,1.3,285640.25
3,Maine,66590,0.3,0.3,67138,67905,68441,67793,0.3,0.3,0.3,67518.5
4,Massachusetts,586347,2.8,2.8,592588,599092,604208,600554,2.8,2.8,2.8,595558.75
5,New Hampshire,87244,0.4,0.4,88147,89152,89836,89128,0.4,0.4,0.4,88594.75
6,Rhode Island,62578,0.3,0.3,63242,63903,64441,63865,0.3,0.3,0.3,63541.0
7,Vermont,34284,0.2,0.2,34612,34973,35271,34927,0.2,0.2,0.2,34785.0
8,Mideast,3786089,17.9,17.9,3827030,3860095,3887642,3847859,17.9,17.9,17.9,3840214.0
9,Delaware,74270,0.4,0.4,75216,75765,76410,76086,0.4,0.4,0.4,75415.25


In [335]:
GDP2019_2020.cleanStates()

Unnamed: 0,Area,2019 Q1,2019 Q4%,2020 Q1%,2019 Q2,2019 Q3,2019 Q4,2020 Q1,2019 Q1 %,2019 Q2 %,2019 Q3 %,2019,Cleaned Area
0,United States,21098827,100.0,100.0,21340267,21542540,21729124,21539689,100.0,100.0,100.0,21427689.5,United States
1,New England,1118703,5.3,5.3,1130084,1142586,1151182,1143978,5.3,5.3,5.3,1135638.75,New England
2,Connecticut,281659,1.3,1.3,284357,287560,288985,287711,1.3,1.3,1.3,285640.25,Connecticut
3,Maine,66590,0.3,0.3,67138,67905,68441,67793,0.3,0.3,0.3,67518.5,Maine
4,Massachusetts,586347,2.8,2.8,592588,599092,604208,600554,2.8,2.8,2.8,595558.75,Massachusetts
5,New Hampshire,87244,0.4,0.4,88147,89152,89836,89128,0.4,0.4,0.4,88594.75,New Hampshire
6,Rhode Island,62578,0.3,0.3,63242,63903,64441,63865,0.3,0.3,0.3,63541.0,Rhode Island
7,Vermont,34284,0.2,0.2,34612,34973,35271,34927,0.2,0.2,0.2,34785.0,Vermont
8,Mideast,3786089,17.9,17.9,3827030,3860095,3887642,3847859,17.9,17.9,17.9,3840214.0,Mideast
9,Delaware,74270,0.4,0.4,75216,75765,76410,76086,0.4,0.4,0.4,75415.25,Delaware


In [336]:
GDP2019_2020.addGeoLocColumn()

Unnamed: 0,Area,2019 Q1,2019 Q4%,2020 Q1%,2019 Q2,2019 Q3,2019 Q4,2020 Q1,2019 Q1 %,2019 Q2 %,2019 Q3 %,2019,Cleaned Area,Geo Loc
0,United States,21098827,100.0,100.0,21340267,21542540,21729124,21539689,100.0,100.0,100.0,21427689.5,United States,
1,New England,1118703,5.3,5.3,1130084,1142586,1151182,1143978,5.3,5.3,5.3,1135638.75,New England,
2,Connecticut,281659,1.3,1.3,284357,287560,288985,287711,1.3,1.3,1.3,285640.25,Connecticut,New England
3,Maine,66590,0.3,0.3,67138,67905,68441,67793,0.3,0.3,0.3,67518.5,Maine,New England
4,Massachusetts,586347,2.8,2.8,592588,599092,604208,600554,2.8,2.8,2.8,595558.75,Massachusetts,New England
5,New Hampshire,87244,0.4,0.4,88147,89152,89836,89128,0.4,0.4,0.4,88594.75,New Hampshire,New England
6,Rhode Island,62578,0.3,0.3,63242,63903,64441,63865,0.3,0.3,0.3,63541.0,Rhode Island,New England
7,Vermont,34284,0.2,0.2,34612,34973,35271,34927,0.2,0.2,0.2,34785.0,Vermont,New England
8,Mideast,3786089,17.9,17.9,3827030,3860095,3887642,3847859,17.9,17.9,17.9,3840214.0,Mideast,
9,Delaware,74270,0.4,0.4,75216,75765,76410,76086,0.4,0.4,0.4,75415.25,Delaware,Mideast


In [337]:
states = ["Alaska", "Alabama", "Arkansas", "Arizona", "California", "Colorado", "Connecticut", "District of Columbia",
          "Delaware", "Florida", "Georgia", "Hawaii", "Iowa", "Idaho", "Illinois", "Indiana", "Kansas", "Kentucky", 
          "Louisiana", "Massachusetts", "Maryland", "Maine", "Michigan", "Minnesota", "Missouri", "Mississippi", "Montana", 
          "North Carolina", "North Dakota", "Nebraska", "New Hampshire", "New Jersey", "New Mexico", "Nevada", "New York", 
          "Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island", "South Carolina", "South Dakota", "Tennessee", 
          "Texas", "Utah", "Virginia", "Vermont", "Washington", "Wisconsin", "West Virginia", "Wyoming"]
        
        # empty DataFrame to hold the new columns
GDPdf = pd.DataFrame()
        # count variable to increase the index by one after each row is added
count = 0
        
        # for loop to iterate through the years specified by the instance
for year in range(GDP2019_2020.yearStart, GDP2019_2020.yearEnd + 1):
            # for each state in that year, add in the following columns
    for state in states:
        GDPdf.loc[count, 'year'] = str(year)
        GDPdf.loc[count, 'state'] = state
        GDPdf.loc[count, 'current dollars'] = GDP2019_2020.dollars
        GDPdf.loc[count, 'GDP'] = GDP2019_2020.df.loc[GDP2019_2020.df['Cleaned Area'] == state, str(year)].item()
        GDPdf.loc[count, 'GDP_area'] = GDP2019_2020.df.loc[GDP2019_2020.df['Cleaned Area'] == state, 'Geo Loc'].item()
        count += 1

GDP2019_2020.df = GDPdf
GDP2019_2020.df

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,2019,Alaska,2019,55406.25,Far West
1,2019,Alabama,2019,230968.25,Southeast
2,2019,Arkansas,2019,133180.75,Southeast
3,2019,Arizona,2019,366190.0,Southwest
4,2019,California,2019,3137469.0,Far West
5,2019,Colorado,2019,390284.0,Rocky Mountain
6,2019,Connecticut,2019,285640.25,New England
7,2019,District of Columbia,2019,146194.5,Mideast
8,2019,Delaware,2019,75415.25,Mideast
9,2019,Florida,2019,1093350.75,Southeast


# Combine Data Frames

In [338]:
# first, need to create a list of DataFrames to concatenate
dfs = [GDP95_01.df, GDP01_03.df, GDP04_06.df, GDP07_09.df, GDP10_12.df, GDP11_13.df,
       GDP_2014.df, GDP2015_2016.df, GDP2017_2018.df, GDP2019_2020.df]
dfs

[     year                 state current dollars        GDP        GDP_area
 0    1995                Alaska            2001    24791.0        Far West
 1    1995               Alabama            2001    95514.0       Southeast
 2    1995              Arkansas            2001    53809.0       Southeast
 3    1995               Arizona            2001   104586.0       Southwest
 4    1995            California            2001   925931.0        Far West
 5    1995              Colorado            2001   109021.0  Rocky Mountain
 6    1995           Connecticut            2001   118645.0     New England
 7    1995  District of Columbia            2001    48408.0         Mideast
 8    1995              Delaware            2001    27575.0         Mideast
 9    1995               Florida            2001   344771.0       Southeast
 10   1995               Georgia            2001   203505.0       Southeast
 11   1995                Hawaii            2001    37243.0        Far West
 12   1995  

In [339]:
# concatenate DataFrames into one DataFrame using pd.concat()
yearlyGDPbyState = pd.concat(dfs, ignore_index = True)
yearlyGDPbyState

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,1995,Alaska,2001,24791.0,Far West
1,1995,Alabama,2001,95514.0,Southeast
2,1995,Arkansas,2001,53809.0,Southeast
3,1995,Arizona,2001,104586.0,Southwest
4,1995,California,2001,925931.0,Far West
5,1995,Colorado,2001,109021.0,Rocky Mountain
6,1995,Connecticut,2001,118645.0,New England
7,1995,District of Columbia,2001,48408.0,Mideast
8,1995,Delaware,2001,27575.0,Mideast
9,1995,Florida,2001,344771.0,Southeast


# Writing DataFrame to CSV

## MAKE SURE TO CHANGE FILE LOCATION IF RUNNING ON YOUR MACHINE

In [340]:
# write the DataFrame to a CSV
file_location = 'C:/Users/Katie/Documents/MSDS Program/CS 5010/CS Project/LifeOfBrian-USHousing/DataSet/yearlyGDPbyState.csv'
yearlyGDPbyState.to_csv(file_location)