In [1]:
import tabula
import pandas as pd
import re
import math
from gdpPDFCleaning import *

pd.set_option("display.max_rows", None, "display.max_columns", None)

# GDP 95-00 Data

In [2]:
# create instance of yearly GDP data
GDP95_01 = YearlyGDP('GDP1995-2001.pdf', 8, 1995, 2000, '2001')

In [3]:
# testing that it was initialized with an empty DataFrame
GDP95_01.df

In [4]:
# initializeDataFrame() method reads in the PDF, assigns the first DataFrame in the 
# list as the df (only one table per page), and renames the columns. Lastly, it returns 
# the DataFrame for viewing
GDP95_01.initializeDataFrame()

Unnamed: 0,Area,1995,1996,1997,1998,1999,2000,2001,NaN
0,,,,,,,,,2000 2001
1,United States.......................,7309516.0,7715901.0,8224960.0,8750174.0,9251541.0,9891187.0,10137190.0,100 100
2,New England....................,416166.0,439596.0,471336.0,503940.0,533324.0,582874.0,594686.0,5.9 5.9
3,Connecticut......................,118645.0,124157.0,134968.0,142701.0,149010.0,161929.0,166165.0,1.6 1.6
4,Maine...............................,27987.0,28925.0,30409.0,32208.0,34102.0,36276.0,37449.0,0.4 0.4
5,Massachusetts..................,197469.0,210127.0,223571.0,241369.0,257802.0,283072.0,287802.0,2.9 2.8
6,New Hampshire...............,32388.0,35068.0,37470.0,40529.0,43360.0,47385.0,47183.0,0.5 0.5
7,Rhode Island....................,25703.0,26656.0,29409.0,30838.0,31895.0,36086.0,36939.0,0.4 0.4
8,Vermont...........................,13974.0,14662.0,15510.0,16294.0,17155.0,18124.0,19149.0,0.2 0.2
9,Mideast..............................,1403270.0,1471796.0,1547124.0,1649536.0,1720155.0,1837583.0,1900223.0,18.6 18.7


In [5]:
GDP95_01.cleanStates('Area')

Unnamed: 0,Area,1995,1996,1997,1998,1999,2000,2001,NaN,Cleaned Area
0,,,,,,,,,2000 2001,
1,United States.......................,7309516.0,7715901.0,8224960.0,8750174.0,9251541.0,9891187.0,10137190.0,100 100,United States
2,New England....................,416166.0,439596.0,471336.0,503940.0,533324.0,582874.0,594686.0,5.9 5.9,New England
3,Connecticut......................,118645.0,124157.0,134968.0,142701.0,149010.0,161929.0,166165.0,1.6 1.6,Connecticut
4,Maine...............................,27987.0,28925.0,30409.0,32208.0,34102.0,36276.0,37449.0,0.4 0.4,Maine
5,Massachusetts..................,197469.0,210127.0,223571.0,241369.0,257802.0,283072.0,287802.0,2.9 2.8,Massachusetts
6,New Hampshire...............,32388.0,35068.0,37470.0,40529.0,43360.0,47385.0,47183.0,0.5 0.5,New Hampshire
7,Rhode Island....................,25703.0,26656.0,29409.0,30838.0,31895.0,36086.0,36939.0,0.4 0.4,Rhode Island
8,Vermont...........................,13974.0,14662.0,15510.0,16294.0,17155.0,18124.0,19149.0,0.2 0.2,Vermont
9,Mideast..............................,1403270.0,1471796.0,1547124.0,1649536.0,1720155.0,1837583.0,1900223.0,18.6 18.7,Mideast


In [6]:
GDP95_01.addGeoLocColumn('Cleaned Area')

Unnamed: 0,Area,1995,1996,1997,1998,1999,2000,2001,NaN,Cleaned Area,Geo Loc
0,,,,,,,,,2000 2001,,
1,United States.......................,7309516.0,7715901.0,8224960.0,8750174.0,9251541.0,9891187.0,10137190.0,100 100,United States,
2,New England....................,416166.0,439596.0,471336.0,503940.0,533324.0,582874.0,594686.0,5.9 5.9,New England,
3,Connecticut......................,118645.0,124157.0,134968.0,142701.0,149010.0,161929.0,166165.0,1.6 1.6,Connecticut,New England
4,Maine...............................,27987.0,28925.0,30409.0,32208.0,34102.0,36276.0,37449.0,0.4 0.4,Maine,New England
5,Massachusetts..................,197469.0,210127.0,223571.0,241369.0,257802.0,283072.0,287802.0,2.9 2.8,Massachusetts,New England
6,New Hampshire...............,32388.0,35068.0,37470.0,40529.0,43360.0,47385.0,47183.0,0.5 0.5,New Hampshire,New England
7,Rhode Island....................,25703.0,26656.0,29409.0,30838.0,31895.0,36086.0,36939.0,0.4 0.4,Rhode Island,New England
8,Vermont...........................,13974.0,14662.0,15510.0,16294.0,17155.0,18124.0,19149.0,0.2 0.2,Vermont,New England
9,Mideast..............................,1403270.0,1471796.0,1547124.0,1649536.0,1720155.0,1837583.0,1900223.0,18.6 18.7,Mideast,


In [7]:
GDP95_01.restructureDataFrame('Cleaned Area')

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,1995,Alaska,2001,24791.0,Far West
1,1995,Alabama,2001,95514.0,Southeast
2,1995,Arkansas,2001,53809.0,Southeast
3,1995,Arizona,2001,104586.0,Southwest
4,1995,California,2001,925931.0,Far West
5,1995,Colorado,2001,109021.0,Rocky Mountain
6,1995,Connecticut,2001,118645.0,New England
7,1995,District of Columbia,2001,48408.0,Mideast
8,1995,Delaware,2001,27575.0,Mideast
9,1995,Florida,2001,344771.0,Southeast


In [8]:
GDP95_01.df

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,1995,Alaska,2001,24791.0,Far West
1,1995,Alabama,2001,95514.0,Southeast
2,1995,Arkansas,2001,53809.0,Southeast
3,1995,Arizona,2001,104586.0,Southwest
4,1995,California,2001,925931.0,Far West
5,1995,Colorado,2001,109021.0,Rocky Mountain
6,1995,Connecticut,2001,118645.0,New England
7,1995,District of Columbia,2001,48408.0,Mideast
8,1995,Delaware,2001,27575.0,Mideast
9,1995,Florida,2001,344771.0,Southeast


# GDP 01 - 03 Data 

In [9]:
# create instance of yearly GDP data
GDP01_03 = YearlyGDP('GDP2001-2003.pdf', 5, 2001, 2003, '2004')

In [10]:
# testing that it was initialized with an empty DataFrame
GDP01_03.df

In [11]:
# initializeDataFrame() method reads in the PDF, assigns the first DataFrame in the 
# list as the df (only one table per page), and renames the columns. Lastly, it returns 
# the DataFrame for viewing
GDP01_03.initializeDataFrame()

Unnamed: 0,Area,2001,2002 2003,2004,2001.0,2002 2003.1,2004.1
0,United States .........,10058156,"10,412,244 10,923,849",11649827,100.0,100.0 100.0,100.0
1,New England ............,584487,"596,017 620,136",664181,5.8,5.7 5.7,5.7
2,Connecticut .............,165434,"167,235 174,085",187086,1.6,1.6 1.6,1.6
3,Maine .....................,37094,"39,027 40,829",43279,0.4,0.4 0.4,0.4
4,Massachusetts .........,283422,"287,191 297,113",317684,2.8,2.8 2.7,2.7
5,New Hampshire .........,44394,"46,106 48,202",52097,0.4,0.4 0.4,0.4
6,Rhode Island ............,35489,"37,040 39,363",41921,0.4,0.4 0.4,0.4
7,Vermont ..................,18656,"19,419 20,544",22114,0.2,0.2 0.2,0.2
8,Mideast .....................,1868057,"1,922,516 2,010,011",2140662,18.6,18.5 18.4,18.4
9,Delaware ..................,45049,"46,991 50,486",54500,0.4,0.5 0.5,0.5


In [12]:
GDP01_03.splitColumns(2, '2002', '2003')

Unnamed: 0,Area,2001,2002 2003,2004,2001.0,2002 2003.1,2004.1,2002,2003
0,United States .........,10058156,"10,412,244 10,923,849",11649827,100.0,100.0 100.0,100.0,10412244,10923849
1,New England ............,584487,"596,017 620,136",664181,5.8,5.7 5.7,5.7,596017,620136
2,Connecticut .............,165434,"167,235 174,085",187086,1.6,1.6 1.6,1.6,167235,174085
3,Maine .....................,37094,"39,027 40,829",43279,0.4,0.4 0.4,0.4,39027,40829
4,Massachusetts .........,283422,"287,191 297,113",317684,2.8,2.8 2.7,2.7,287191,297113
5,New Hampshire .........,44394,"46,106 48,202",52097,0.4,0.4 0.4,0.4,46106,48202
6,Rhode Island ............,35489,"37,040 39,363",41921,0.4,0.4 0.4,0.4,37040,39363
7,Vermont ..................,18656,"19,419 20,544",22114,0.2,0.2 0.2,0.2,19419,20544
8,Mideast .....................,1868057,"1,922,516 2,010,011",2140662,18.6,18.5 18.4,18.4,1922516,2010011
9,Delaware ..................,45049,"46,991 50,486",54500,0.4,0.5 0.5,0.5,46991,50486


In [13]:
GDP01_03.splitColumns(5, '2002 %', '2003 %')

Unnamed: 0,Area,2001,2002 2003,2004,2001.0,2002 2003.1,2004.1,2002,2003,2002 %,2003 %
0,United States .........,10058156,"10,412,244 10,923,849",11649827,100.0,100.0 100.0,100.0,10412244,10923849,100.0,100.0
1,New England ............,584487,"596,017 620,136",664181,5.8,5.7 5.7,5.7,596017,620136,5.7,5.7
2,Connecticut .............,165434,"167,235 174,085",187086,1.6,1.6 1.6,1.6,167235,174085,1.6,1.6
3,Maine .....................,37094,"39,027 40,829",43279,0.4,0.4 0.4,0.4,39027,40829,0.4,0.4
4,Massachusetts .........,283422,"287,191 297,113",317684,2.8,2.8 2.7,2.7,287191,297113,2.8,2.7
5,New Hampshire .........,44394,"46,106 48,202",52097,0.4,0.4 0.4,0.4,46106,48202,0.4,0.4
6,Rhode Island ............,35489,"37,040 39,363",41921,0.4,0.4 0.4,0.4,37040,39363,0.4,0.4
7,Vermont ..................,18656,"19,419 20,544",22114,0.2,0.2 0.2,0.2,19419,20544,0.2,0.2
8,Mideast .....................,1868057,"1,922,516 2,010,011",2140662,18.6,18.5 18.4,18.4,1922516,2010011,18.5,18.4
9,Delaware ..................,45049,"46,991 50,486",54500,0.4,0.5 0.5,0.5,46991,50486,0.5,0.5


In [14]:
GDP01_03.cleanStates('Area')

Unnamed: 0,Area,2001,2002 2003,2004,2001.0,2002 2003.1,2004.1,2002,2003,2002 %,2003 %,Cleaned Area
0,United States .........,10058156,"10,412,244 10,923,849",11649827,100.0,100.0 100.0,100.0,10412244,10923849,100.0,100.0,United States
1,New England ............,584487,"596,017 620,136",664181,5.8,5.7 5.7,5.7,596017,620136,5.7,5.7,New England
2,Connecticut .............,165434,"167,235 174,085",187086,1.6,1.6 1.6,1.6,167235,174085,1.6,1.6,Connecticut
3,Maine .....................,37094,"39,027 40,829",43279,0.4,0.4 0.4,0.4,39027,40829,0.4,0.4,Maine
4,Massachusetts .........,283422,"287,191 297,113",317684,2.8,2.8 2.7,2.7,287191,297113,2.8,2.7,Massachusetts
5,New Hampshire .........,44394,"46,106 48,202",52097,0.4,0.4 0.4,0.4,46106,48202,0.4,0.4,New Hampshire
6,Rhode Island ............,35489,"37,040 39,363",41921,0.4,0.4 0.4,0.4,37040,39363,0.4,0.4,Rhode Island
7,Vermont ..................,18656,"19,419 20,544",22114,0.2,0.2 0.2,0.2,19419,20544,0.2,0.2,Vermont
8,Mideast .....................,1868057,"1,922,516 2,010,011",2140662,18.6,18.5 18.4,18.4,1922516,2010011,18.5,18.4,Mideast
9,Delaware ..................,45049,"46,991 50,486",54500,0.4,0.5 0.5,0.5,46991,50486,0.5,0.5,Delaware


In [15]:
GDP01_03.addGeoLocColumn('Cleaned Area')

Unnamed: 0,Area,2001,2002 2003,2004,2001.0,2002 2003.1,2004.1,2002,2003,2002 %,2003 %,Cleaned Area,Geo Loc
0,United States .........,10058156,"10,412,244 10,923,849",11649827,100.0,100.0 100.0,100.0,10412244,10923849,100.0,100.0,United States,
1,New England ............,584487,"596,017 620,136",664181,5.8,5.7 5.7,5.7,596017,620136,5.7,5.7,New England,
2,Connecticut .............,165434,"167,235 174,085",187086,1.6,1.6 1.6,1.6,167235,174085,1.6,1.6,Connecticut,New England
3,Maine .....................,37094,"39,027 40,829",43279,0.4,0.4 0.4,0.4,39027,40829,0.4,0.4,Maine,New England
4,Massachusetts .........,283422,"287,191 297,113",317684,2.8,2.8 2.7,2.7,287191,297113,2.8,2.7,Massachusetts,New England
5,New Hampshire .........,44394,"46,106 48,202",52097,0.4,0.4 0.4,0.4,46106,48202,0.4,0.4,New Hampshire,New England
6,Rhode Island ............,35489,"37,040 39,363",41921,0.4,0.4 0.4,0.4,37040,39363,0.4,0.4,Rhode Island,New England
7,Vermont ..................,18656,"19,419 20,544",22114,0.2,0.2 0.2,0.2,19419,20544,0.2,0.2,Vermont,New England
8,Mideast .....................,1868057,"1,922,516 2,010,011",2140662,18.6,18.5 18.4,18.4,1922516,2010011,18.5,18.4,Mideast,
9,Delaware ..................,45049,"46,991 50,486",54500,0.4,0.5 0.5,0.5,46991,50486,0.5,0.5,Delaware,Mideast


In [16]:
GDP01_03.restructureDataFrame('Cleaned Area')

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,2001,Alaska,2004,27358.0,Far West
1,2001,Alabama,2004,118263.0,Southeast
2,2001,Arkansas,2004,68574.0,Southeast
3,2001,Arizona,2004,164263.0,Southwest
4,2001,California,2004,1307880.0,Far West
5,2001,Colorado,2004,177526.0,Rocky Mountain
6,2001,Connecticut,2004,165434.0,New England
7,2001,District of Columbia,2004,63223.0,Mideast
8,2001,Delaware,2004,45049.0,Mideast
9,2001,Florida,2004,496861.0,Southeast


In [17]:
GDP01_03.df

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,2001,Alaska,2004,27358.0,Far West
1,2001,Alabama,2004,118263.0,Southeast
2,2001,Arkansas,2004,68574.0,Southeast
3,2001,Arizona,2004,164263.0,Southwest
4,2001,California,2004,1307880.0,Far West
5,2001,Colorado,2004,177526.0,Rocky Mountain
6,2001,Connecticut,2004,165434.0,New England
7,2001,District of Columbia,2004,63223.0,Mideast
8,2001,Delaware,2004,45049.0,Mideast
9,2001,Florida,2004,496861.0,Southeast


#  GDP 04 - 06

In [18]:
# create instance of yearly GDP data
GDP04_06 = YearlyGDP('GDP2004-2006.pdf', 11, 2004, 2006, '2007')

In [19]:
# testing that it was initialized with an empty DataFrame
GDP04_06.df

In [20]:
# initializeDataFrame() method reads in the PDF, assigns the first DataFrame in the 
# list as the df (only one table per page), and renames the columns. Lastly, it returns 
# the DataFrame for viewing
GDP04_06.initializeDataFrame()

Unnamed: 0,Area,2004,2005 2006,2007,2004.0,2005 2006.1,2007.1
0,United States...............,11607041,"12,346,871 13,119,938",13743021,100.0,100.0 100.0,100.0
1,New England..................,647473,"674,562 712,051",744672,5.6,5.5 5.4,5.4
2,Connecticut.....................,182112,"193,281 204,964",216266,1.6,1.6 1.6,1.6
3,Maine............................,43191,"44,364 46,340",48108,0.4,0.4 0.4,0.4
4,Massachusetts..................,306827,"317,626 335,313",351514,2.6,2.6 2.6,2.6
5,New Hampshire...............,51432,"53,468 56,073",57341,0.4,0.4 0.4,0.4
6,Rhode Island..................,42073,"43,078 45,733",46900,0.4,0.3 0.3,0.3
7,Vermont........................,21839,"22,745 23,628",24543,0.2,0.2 0.2,0.2
8,Mideast...........................,2124891,"2,245,718 2,390,856",2522240,18.3,18.2 18.2,18.4
9,Delaware........................,52305,"57,334 59,589",60118,0.5,0.5 0.5,0.4


In [21]:
GDP04_06.splitColumns(2, '2005', '2006')

Unnamed: 0,Area,2004,2005 2006,2007,2004.0,2005 2006.1,2007.1,2005,2006
0,United States...............,11607041,"12,346,871 13,119,938",13743021,100.0,100.0 100.0,100.0,12346871,13119938
1,New England..................,647473,"674,562 712,051",744672,5.6,5.5 5.4,5.4,674562,712051
2,Connecticut.....................,182112,"193,281 204,964",216266,1.6,1.6 1.6,1.6,193281,204964
3,Maine............................,43191,"44,364 46,340",48108,0.4,0.4 0.4,0.4,44364,46340
4,Massachusetts..................,306827,"317,626 335,313",351514,2.6,2.6 2.6,2.6,317626,335313
5,New Hampshire...............,51432,"53,468 56,073",57341,0.4,0.4 0.4,0.4,53468,56073
6,Rhode Island..................,42073,"43,078 45,733",46900,0.4,0.3 0.3,0.3,43078,45733
7,Vermont........................,21839,"22,745 23,628",24543,0.2,0.2 0.2,0.2,22745,23628
8,Mideast...........................,2124891,"2,245,718 2,390,856",2522240,18.3,18.2 18.2,18.4,2245718,2390856
9,Delaware........................,52305,"57,334 59,589",60118,0.5,0.5 0.5,0.4,57334,59589


In [22]:
GDP04_06.splitColumns(5, '2005 %', '2006 %')

Unnamed: 0,Area,2004,2005 2006,2007,2004.0,2005 2006.1,2007.1,2005,2006,2005 %,2006 %
0,United States...............,11607041,"12,346,871 13,119,938",13743021,100.0,100.0 100.0,100.0,12346871,13119938,100.0,100.0
1,New England..................,647473,"674,562 712,051",744672,5.6,5.5 5.4,5.4,674562,712051,5.5,5.4
2,Connecticut.....................,182112,"193,281 204,964",216266,1.6,1.6 1.6,1.6,193281,204964,1.6,1.6
3,Maine............................,43191,"44,364 46,340",48108,0.4,0.4 0.4,0.4,44364,46340,0.4,0.4
4,Massachusetts..................,306827,"317,626 335,313",351514,2.6,2.6 2.6,2.6,317626,335313,2.6,2.6
5,New Hampshire...............,51432,"53,468 56,073",57341,0.4,0.4 0.4,0.4,53468,56073,0.4,0.4
6,Rhode Island..................,42073,"43,078 45,733",46900,0.4,0.3 0.3,0.3,43078,45733,0.3,0.3
7,Vermont........................,21839,"22,745 23,628",24543,0.2,0.2 0.2,0.2,22745,23628,0.2,0.2
8,Mideast...........................,2124891,"2,245,718 2,390,856",2522240,18.3,18.2 18.2,18.4,2245718,2390856,18.2,18.2
9,Delaware........................,52305,"57,334 59,589",60118,0.5,0.5 0.5,0.4,57334,59589,0.5,0.5


In [23]:
GDP04_06.cleanStates('Area')

Unnamed: 0,Area,2004,2005 2006,2007,2004.0,2005 2006.1,2007.1,2005,2006,2005 %,2006 %,Cleaned Area
0,United States...............,11607041,"12,346,871 13,119,938",13743021,100.0,100.0 100.0,100.0,12346871,13119938,100.0,100.0,United States
1,New England..................,647473,"674,562 712,051",744672,5.6,5.5 5.4,5.4,674562,712051,5.5,5.4,New England
2,Connecticut.....................,182112,"193,281 204,964",216266,1.6,1.6 1.6,1.6,193281,204964,1.6,1.6,Connecticut
3,Maine............................,43191,"44,364 46,340",48108,0.4,0.4 0.4,0.4,44364,46340,0.4,0.4,Maine
4,Massachusetts..................,306827,"317,626 335,313",351514,2.6,2.6 2.6,2.6,317626,335313,2.6,2.6,Massachusetts
5,New Hampshire...............,51432,"53,468 56,073",57341,0.4,0.4 0.4,0.4,53468,56073,0.4,0.4,New Hampshire
6,Rhode Island..................,42073,"43,078 45,733",46900,0.4,0.3 0.3,0.3,43078,45733,0.3,0.3,Rhode Island
7,Vermont........................,21839,"22,745 23,628",24543,0.2,0.2 0.2,0.2,22745,23628,0.2,0.2,Vermont
8,Mideast...........................,2124891,"2,245,718 2,390,856",2522240,18.3,18.2 18.2,18.4,2245718,2390856,18.2,18.2,Mideast
9,Delaware........................,52305,"57,334 59,589",60118,0.5,0.5 0.5,0.4,57334,59589,0.5,0.5,Delaware


In [24]:
GDP04_06.addGeoLocColumn('Cleaned Area')

Unnamed: 0,Area,2004,2005 2006,2007,2004.0,2005 2006.1,2007.1,2005,2006,2005 %,2006 %,Cleaned Area,Geo Loc
0,United States...............,11607041,"12,346,871 13,119,938",13743021,100.0,100.0 100.0,100.0,12346871,13119938,100.0,100.0,United States,
1,New England..................,647473,"674,562 712,051",744672,5.6,5.5 5.4,5.4,674562,712051,5.5,5.4,New England,
2,Connecticut.....................,182112,"193,281 204,964",216266,1.6,1.6 1.6,1.6,193281,204964,1.6,1.6,Connecticut,New England
3,Maine............................,43191,"44,364 46,340",48108,0.4,0.4 0.4,0.4,44364,46340,0.4,0.4,Maine,New England
4,Massachusetts..................,306827,"317,626 335,313",351514,2.6,2.6 2.6,2.6,317626,335313,2.6,2.6,Massachusetts,New England
5,New Hampshire...............,51432,"53,468 56,073",57341,0.4,0.4 0.4,0.4,53468,56073,0.4,0.4,New Hampshire,New England
6,Rhode Island..................,42073,"43,078 45,733",46900,0.4,0.3 0.3,0.3,43078,45733,0.3,0.3,Rhode Island,New England
7,Vermont........................,21839,"22,745 23,628",24543,0.2,0.2 0.2,0.2,22745,23628,0.2,0.2,Vermont,New England
8,Mideast...........................,2124891,"2,245,718 2,390,856",2522240,18.3,18.2 18.2,18.4,2245718,2390856,18.2,18.2,Mideast,
9,Delaware........................,52305,"57,334 59,589",60118,0.5,0.5 0.5,0.4,57334,59589,0.5,0.5,Delaware,Mideast


In [25]:
GDP04_06.restructureDataFrame('Cleaned Area')

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,2004,Alaska,2007,35102.0,Far West
1,2004,Alabama,2007,141527.0,Southeast
2,2004,Arkansas,2007,82137.0,Southeast
3,2004,Arizona,2007,193448.0,Southwest
4,2004,California,2007,1519443.0,Far West
5,2004,Colorado,2007,197329.0,Rocky Mountain
6,2004,Connecticut,2007,182112.0,New England
7,2004,District of Columbia,2007,77913.0,Mideast
8,2004,Delaware,2007,52305.0,Mideast
9,2004,Florida,2007,607284.0,Southeast


In [26]:
GDP04_06.df

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,2004,Alaska,2007,35102.0,Far West
1,2004,Alabama,2007,141527.0,Southeast
2,2004,Arkansas,2007,82137.0,Southeast
3,2004,Arizona,2007,193448.0,Southwest
4,2004,California,2007,1519443.0,Far West
5,2004,Colorado,2007,197329.0,Rocky Mountain
6,2004,Connecticut,2007,182112.0,New England
7,2004,District of Columbia,2007,77913.0,Mideast
8,2004,Delaware,2007,52305.0,Mideast
9,2004,Florida,2007,607284.0,Southeast


# GDP 07-09 Data

In [27]:
# create instance of yearly GDP data
GDP07_09 = YearlyGDP('GDP2007-2009.pdf', 9, 2007, 2009, '2010')

In [28]:
# testing that it was initialized with an empty DataFrame
GDP07_09.df

In [29]:
# initializeDataFrame() method reads in the PDF, assigns the first DataFrame in the 
# list as the df (only one table per page), and renames the columns. Lastly, it returns 
# the DataFrame for viewing
GDP07_09.initializeDataFrame()

Unnamed: 0,Area,2007,2008 2009,2010,2007.0,2008 2009.1,2010.1
0,United States..................,13969323,"14,270,462 14,014,849",14551782,100.0,100.0 100.0,100.0
1,New England......................,754306,"772,347 769,308",802771,5.4,5.4 5.5,5.5
2,Connecticut........................,222498,"225,958 227,550",237261,1.6,1.6 1.6,1.6
3,Maine.................................,49195,"49,972 50,039",51643,0.4,0.4 0.4,0.4
4,Massachusetts...................,353329,"365,623 360,538",378729,2.5,2.6 2.6,2.6
5,New Hampshire.................,57856,"58,780 59,086",60283,0.4,0.4 0.4,0.4
6,Rhode Island.....................,47334,"47,378 47,470",49234,0.3,0.3 0.3,0.3
7,Vermont.............................,24093,"24,636 24,625",25620,0.2,0.2 0.2,0.2
8,Mideast...............................,2515483,"2,575,532 2,557,257",2677426,18.0,18.0 18.2,18.4
9,Delaware...........................,60108,"58,674 60,660",62280,0.4,0.4 0.4,0.4


In [30]:
GDP07_09.splitColumns(2, '2008', '2009')

Unnamed: 0,Area,2007,2008 2009,2010,2007.0,2008 2009.1,2010.1,2008,2009
0,United States..................,13969323,"14,270,462 14,014,849",14551782,100.0,100.0 100.0,100.0,14270462,14014849
1,New England......................,754306,"772,347 769,308",802771,5.4,5.4 5.5,5.5,772347,769308
2,Connecticut........................,222498,"225,958 227,550",237261,1.6,1.6 1.6,1.6,225958,227550
3,Maine.................................,49195,"49,972 50,039",51643,0.4,0.4 0.4,0.4,49972,50039
4,Massachusetts...................,353329,"365,623 360,538",378729,2.5,2.6 2.6,2.6,365623,360538
5,New Hampshire.................,57856,"58,780 59,086",60283,0.4,0.4 0.4,0.4,58780,59086
6,Rhode Island.....................,47334,"47,378 47,470",49234,0.3,0.3 0.3,0.3,47378,47470
7,Vermont.............................,24093,"24,636 24,625",25620,0.2,0.2 0.2,0.2,24636,24625
8,Mideast...............................,2515483,"2,575,532 2,557,257",2677426,18.0,18.0 18.2,18.4,2575532,2557257
9,Delaware...........................,60108,"58,674 60,660",62280,0.4,0.4 0.4,0.4,58674,60660


In [31]:
GDP07_09.splitColumns(5, '2008 %', '2009 %')

Unnamed: 0,Area,2007,2008 2009,2010,2007.0,2008 2009.1,2010.1,2008,2009,2008 %,2009 %
0,United States..................,13969323,"14,270,462 14,014,849",14551782,100.0,100.0 100.0,100.0,14270462,14014849,100.0,100.0
1,New England......................,754306,"772,347 769,308",802771,5.4,5.4 5.5,5.5,772347,769308,5.4,5.5
2,Connecticut........................,222498,"225,958 227,550",237261,1.6,1.6 1.6,1.6,225958,227550,1.6,1.6
3,Maine.................................,49195,"49,972 50,039",51643,0.4,0.4 0.4,0.4,49972,50039,0.4,0.4
4,Massachusetts...................,353329,"365,623 360,538",378729,2.5,2.6 2.6,2.6,365623,360538,2.6,2.6
5,New Hampshire.................,57856,"58,780 59,086",60283,0.4,0.4 0.4,0.4,58780,59086,0.4,0.4
6,Rhode Island.....................,47334,"47,378 47,470",49234,0.3,0.3 0.3,0.3,47378,47470,0.3,0.3
7,Vermont.............................,24093,"24,636 24,625",25620,0.2,0.2 0.2,0.2,24636,24625,0.2,0.2
8,Mideast...............................,2515483,"2,575,532 2,557,257",2677426,18.0,18.0 18.2,18.4,2575532,2557257,18.0,18.2
9,Delaware...........................,60108,"58,674 60,660",62280,0.4,0.4 0.4,0.4,58674,60660,0.4,0.4


In [32]:
GDP07_09.cleanStates('Area')

Unnamed: 0,Area,2007,2008 2009,2010,2007.0,2008 2009.1,2010.1,2008,2009,2008 %,2009 %,Cleaned Area
0,United States..................,13969323,"14,270,462 14,014,849",14551782,100.0,100.0 100.0,100.0,14270462,14014849,100.0,100.0,United States
1,New England......................,754306,"772,347 769,308",802771,5.4,5.4 5.5,5.5,772347,769308,5.4,5.5,New England
2,Connecticut........................,222498,"225,958 227,550",237261,1.6,1.6 1.6,1.6,225958,227550,1.6,1.6,Connecticut
3,Maine.................................,49195,"49,972 50,039",51643,0.4,0.4 0.4,0.4,49972,50039,0.4,0.4,Maine
4,Massachusetts...................,353329,"365,623 360,538",378729,2.5,2.6 2.6,2.6,365623,360538,2.6,2.6,Massachusetts
5,New Hampshire.................,57856,"58,780 59,086",60283,0.4,0.4 0.4,0.4,58780,59086,0.4,0.4,New Hampshire
6,Rhode Island.....................,47334,"47,378 47,470",49234,0.3,0.3 0.3,0.3,47378,47470,0.3,0.3,Rhode Island
7,Vermont.............................,24093,"24,636 24,625",25620,0.2,0.2 0.2,0.2,24636,24625,0.2,0.2,Vermont
8,Mideast...............................,2515483,"2,575,532 2,557,257",2677426,18.0,18.0 18.2,18.4,2575532,2557257,18.0,18.2,Mideast
9,Delaware...........................,60108,"58,674 60,660",62280,0.4,0.4 0.4,0.4,58674,60660,0.4,0.4,Delaware


In [33]:
GDP07_09.addGeoLocColumn('Cleaned Area')

Unnamed: 0,Area,2007,2008 2009,2010,2007.0,2008 2009.1,2010.1,2008,2009,2008 %,2009 %,Cleaned Area,Geo Loc
0,United States..................,13969323,"14,270,462 14,014,849",14551782,100.0,100.0 100.0,100.0,14270462,14014849,100.0,100.0,United States,
1,New England......................,754306,"772,347 769,308",802771,5.4,5.4 5.5,5.5,772347,769308,5.4,5.5,New England,
2,Connecticut........................,222498,"225,958 227,550",237261,1.6,1.6 1.6,1.6,225958,227550,1.6,1.6,Connecticut,New England
3,Maine.................................,49195,"49,972 50,039",51643,0.4,0.4 0.4,0.4,49972,50039,0.4,0.4,Maine,New England
4,Massachusetts...................,353329,"365,623 360,538",378729,2.5,2.6 2.6,2.6,365623,360538,2.6,2.6,Massachusetts,New England
5,New Hampshire.................,57856,"58,780 59,086",60283,0.4,0.4 0.4,0.4,58780,59086,0.4,0.4,New Hampshire,New England
6,Rhode Island.....................,47334,"47,378 47,470",49234,0.3,0.3 0.3,0.3,47378,47470,0.3,0.3,Rhode Island,New England
7,Vermont.............................,24093,"24,636 24,625",25620,0.2,0.2 0.2,0.2,24636,24625,0.2,0.2,Vermont,New England
8,Mideast...............................,2515483,"2,575,532 2,557,257",2677426,18.0,18.0 18.2,18.4,2575532,2557257,18.0,18.2,Mideast,
9,Delaware...........................,60108,"58,674 60,660",62280,0.4,0.4 0.4,0.4,58674,60660,0.4,0.4,Delaware,Mideast


In [34]:
GDP07_09.restructureDataFrame('Cleaned Area')

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,2007,Alaska,2010,44587.0,Far West
1,2007,Alabama,2010,165981.0,Southeast
2,2007,Arkansas,2010,97187.0,Southeast
3,2007,Arizona,2010,260122.0,Southwest
4,2007,California,2010,1874783.0,Far West
5,2007,Colorado,2010,242900.0,Rocky Mountain
6,2007,Connecticut,2010,222498.0,New England
7,2007,District of Columbia,2010,92342.0,Mideast
8,2007,Delaware,2010,60108.0,Mideast
9,2007,Florida,2010,759572.0,Southeast


# GDP 10-12 Data

In [54]:
# create instance of yearly GDP data
GDP10_12 = YearlyGDP('GDP2010-2012.pdf', 10, 2010, 2012, '2013')

In [55]:
# testing that it was initialized with an empty DataFrame
GDP10_12.df

In [56]:
# initializeDataFrame() method reads in the PDF, assigns the first DataFrame in the 
# list as the df (only one table per page), and renames the columns. Lastly, it returns 
# the DataFrame for viewing
GDP10_12.initializeDataFrame()

Unnamed: 0,Area,2010,2011 2012,2013,2010.0,2011 2012.1,2013.1
0,United States/1/...............,14862637,"15,431,583 16,141,152",16701415,100.0,100.0 100.0,100.0
1,New England ...............,823311,"842,182 874,201",900870,5.5,5.5 5.4,5.4
2,Connecticut ...............,233781,"235,121 242,930",249251,1.6,1.5 1.5,1.5
3,Maine .......................,51470,"51,756 53,235",54755,0.3,0.3 0.3,0.3
4,Massachusetts ...........,399603,"413,716 431,937",446323,2.7,2.7 2.7,2.7
5,New Hampshire .........,62622,"64,122 66,111",67848,0.4,0.4 0.4,0.4
6,Rhode Island .............,49265,"49,921 51,566",53184,0.3,0.3 0.3,0.3
7,Vermont ....................,26570,"27,545 28,422",29509,0.2,0.2 0.2,0.2
8,Mideast ......................,2766975,"2,841,050 2,948,377",3017145,18.6,18.4 18.3,18.1
9,Delaware ..................,57628,"58,612 60,650",62703,0.4,0.4 0.4,0.4


In [57]:
GDP10_12.splitColumns(2, '2011', '2012')

Unnamed: 0,Area,2010,2011 2012,2013,2010.0,2011 2012.1,2013.1,2011,2012
0,United States/1/...............,14862637,"15,431,583 16,141,152",16701415,100.0,100.0 100.0,100.0,15431583,16141152
1,New England ...............,823311,"842,182 874,201",900870,5.5,5.5 5.4,5.4,842182,874201
2,Connecticut ...............,233781,"235,121 242,930",249251,1.6,1.5 1.5,1.5,235121,242930
3,Maine .......................,51470,"51,756 53,235",54755,0.3,0.3 0.3,0.3,51756,53235
4,Massachusetts ...........,399603,"413,716 431,937",446323,2.7,2.7 2.7,2.7,413716,431937
5,New Hampshire .........,62622,"64,122 66,111",67848,0.4,0.4 0.4,0.4,64122,66111
6,Rhode Island .............,49265,"49,921 51,566",53184,0.3,0.3 0.3,0.3,49921,51566
7,Vermont ....................,26570,"27,545 28,422",29509,0.2,0.2 0.2,0.2,27545,28422
8,Mideast ......................,2766975,"2,841,050 2,948,377",3017145,18.6,18.4 18.3,18.1,2841050,2948377
9,Delaware ..................,57628,"58,612 60,650",62703,0.4,0.4 0.4,0.4,58612,60650


In [58]:
GDP10_12.splitColumns(5, '2011 %', '2012 %')

Unnamed: 0,Area,2010,2011 2012,2013,2010.0,2011 2012.1,2013.1,2011,2012,2011 %,2012 %
0,United States/1/...............,14862637,"15,431,583 16,141,152",16701415,100.0,100.0 100.0,100.0,15431583,16141152,100.0,100.0
1,New England ...............,823311,"842,182 874,201",900870,5.5,5.5 5.4,5.4,842182,874201,5.5,5.4
2,Connecticut ...............,233781,"235,121 242,930",249251,1.6,1.5 1.5,1.5,235121,242930,1.5,1.5
3,Maine .......................,51470,"51,756 53,235",54755,0.3,0.3 0.3,0.3,51756,53235,0.3,0.3
4,Massachusetts ...........,399603,"413,716 431,937",446323,2.7,2.7 2.7,2.7,413716,431937,2.7,2.7
5,New Hampshire .........,62622,"64,122 66,111",67848,0.4,0.4 0.4,0.4,64122,66111,0.4,0.4
6,Rhode Island .............,49265,"49,921 51,566",53184,0.3,0.3 0.3,0.3,49921,51566,0.3,0.3
7,Vermont ....................,26570,"27,545 28,422",29509,0.2,0.2 0.2,0.2,27545,28422,0.2,0.2
8,Mideast ......................,2766975,"2,841,050 2,948,377",3017145,18.6,18.4 18.3,18.1,2841050,2948377,18.4,18.3
9,Delaware ..................,57628,"58,612 60,650",62703,0.4,0.4 0.4,0.4,58612,60650,0.4,0.4


In [59]:
GDP10_12.cleanStates('Area')

Unnamed: 0,Area,2010,2011 2012,2013,2010.0,2011 2012.1,2013.1,2011,2012,2011 %,2012 %,Cleaned Area
0,United States/1/...............,14862637,"15,431,583 16,141,152",16701415,100.0,100.0 100.0,100.0,15431583,16141152,100.0,100.0,United States
1,New England ...............,823311,"842,182 874,201",900870,5.5,5.5 5.4,5.4,842182,874201,5.5,5.4,New England
2,Connecticut ...............,233781,"235,121 242,930",249251,1.6,1.5 1.5,1.5,235121,242930,1.5,1.5,Connecticut
3,Maine .......................,51470,"51,756 53,235",54755,0.3,0.3 0.3,0.3,51756,53235,0.3,0.3,Maine
4,Massachusetts ...........,399603,"413,716 431,937",446323,2.7,2.7 2.7,2.7,413716,431937,2.7,2.7,Massachusetts
5,New Hampshire .........,62622,"64,122 66,111",67848,0.4,0.4 0.4,0.4,64122,66111,0.4,0.4,New Hampshire
6,Rhode Island .............,49265,"49,921 51,566",53184,0.3,0.3 0.3,0.3,49921,51566,0.3,0.3,Rhode Island
7,Vermont ....................,26570,"27,545 28,422",29509,0.2,0.2 0.2,0.2,27545,28422,0.2,0.2,Vermont
8,Mideast ......................,2766975,"2,841,050 2,948,377",3017145,18.6,18.4 18.3,18.1,2841050,2948377,18.4,18.3,Mideast
9,Delaware ..................,57628,"58,612 60,650",62703,0.4,0.4 0.4,0.4,58612,60650,0.4,0.4,Delaware


In [60]:
GDP10_12.addGeoLocColumn('Cleaned Area')

Unnamed: 0,Area,2010,2011 2012,2013,2010.0,2011 2012.1,2013.1,2011,2012,2011 %,2012 %,Cleaned Area,Geo Loc
0,United States/1/...............,14862637,"15,431,583 16,141,152",16701415,100.0,100.0 100.0,100.0,15431583,16141152,100.0,100.0,United States,
1,New England ...............,823311,"842,182 874,201",900870,5.5,5.5 5.4,5.4,842182,874201,5.5,5.4,New England,
2,Connecticut ...............,233781,"235,121 242,930",249251,1.6,1.5 1.5,1.5,235121,242930,1.5,1.5,Connecticut,New England
3,Maine .......................,51470,"51,756 53,235",54755,0.3,0.3 0.3,0.3,51756,53235,0.3,0.3,Maine,New England
4,Massachusetts ...........,399603,"413,716 431,937",446323,2.7,2.7 2.7,2.7,413716,431937,2.7,2.7,Massachusetts,New England
5,New Hampshire .........,62622,"64,122 66,111",67848,0.4,0.4 0.4,0.4,64122,66111,0.4,0.4,New Hampshire,New England
6,Rhode Island .............,49265,"49,921 51,566",53184,0.3,0.3 0.3,0.3,49921,51566,0.3,0.3,Rhode Island,New England
7,Vermont ....................,26570,"27,545 28,422",29509,0.2,0.2 0.2,0.2,27545,28422,0.2,0.2,Vermont,New England
8,Mideast ......................,2766975,"2,841,050 2,948,377",3017145,18.6,18.4 18.3,18.1,2841050,2948377,18.4,18.3,Mideast,
9,Delaware ..................,57628,"58,612 60,650",62703,0.4,0.4 0.4,0.4,58612,60650,0.4,0.4,Delaware,Mideast


In [61]:
GDP10_12.restructureDataFrame('Cleaned Area')

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,2010,Alaska,2013,53251.0,Far West
1,2010,Alabama,2013,175734.0,Southeast
2,2010,Arkansas,2013,111355.0,Southeast
3,2010,Arizona,2013,247752.0,Southwest
4,2010,California,2013,1953411.0,Far West
5,2010,Colorado,2013,256628.0,Rocky Mountain
6,2010,Connecticut,2013,233781.0,New England
7,2010,District of Columbia,2013,106615.0,Mideast
8,2010,Delaware,2013,57628.0,Mideast
9,2010,Florida,2013,728604.0,Southeast


# GDP 11-13 Data

In [62]:
# create instance of yearly GDP data
GDP11_13 = YearlyGDP('GDP2011-2013.pdf', 9, 2013, 2013, '2014')

In [63]:
# testing that it was initialized with an empty DataFrame
GDP11_13.df

In [64]:
# initializeDataFrame() method reads in the PDF, assigns the first DataFrame in the 
# list as the df (only one table per page), and renames the columns. Lastly, it returns 
# the DataFrame for viewing
GDP11_13.initializeDataFrame()

Unnamed: 0,Area,2011,2012 2013,2014,2011.0,2012 2013.1,2014.1
0,United States1,15416873,"16,060,678 16,665,215",17316314,100.0,100.0 100.0,100.0
1,New England,840420,"867,593 893,812",924937,5.5,5.4 5.4,5.3
2,Connecticut,233960,"239,878 246,897",253036,1.5,1.5 1.5,1.5
3,Maine,52007,"53,180 54,609",55838,0.3,0.3 0.3,0.3
4,Massachusetts,412653,"428,350 441,467",459937,2.7,2.7 2.6,2.7
5,New Hampshire,64246,"66,490 68,701",71552,0.4,0.4 0.4,0.4
6,Rhode Island,49932,"51,346 53,300",54960,0.3,0.3 0.3,0.3
7,Vermont,27622,"28,348 28,838",29613,0.2,0.2 0.2,0.2
8,Mideast,2826317,"2,944,778 3,031,201",3143366,18.3,18.3 18.2,18.2
9,Delaware,59275,"59,104 60,816",62756,0.4,0.4 0.4,0.4


In [65]:
GDP11_13.splitColumns(2, '2012', '2013')

Unnamed: 0,Area,2011,2012 2013,2014,2011.0,2012 2013.1,2014.1,2012,2013
0,United States1,15416873,"16,060,678 16,665,215",17316314,100.0,100.0 100.0,100.0,16060678,16665215
1,New England,840420,"867,593 893,812",924937,5.5,5.4 5.4,5.3,867593,893812
2,Connecticut,233960,"239,878 246,897",253036,1.5,1.5 1.5,1.5,239878,246897
3,Maine,52007,"53,180 54,609",55838,0.3,0.3 0.3,0.3,53180,54609
4,Massachusetts,412653,"428,350 441,467",459937,2.7,2.7 2.6,2.7,428350,441467
5,New Hampshire,64246,"66,490 68,701",71552,0.4,0.4 0.4,0.4,66490,68701
6,Rhode Island,49932,"51,346 53,300",54960,0.3,0.3 0.3,0.3,51346,53300
7,Vermont,27622,"28,348 28,838",29613,0.2,0.2 0.2,0.2,28348,28838
8,Mideast,2826317,"2,944,778 3,031,201",3143366,18.3,18.3 18.2,18.2,2944778,3031201
9,Delaware,59275,"59,104 60,816",62756,0.4,0.4 0.4,0.4,59104,60816


In [66]:
GDP11_13.splitColumns(5, '2012 %', '2013 %')

Unnamed: 0,Area,2011,2012 2013,2014,2011.0,2012 2013.1,2014.1,2012,2013,2012 %,2013 %
0,United States1,15416873,"16,060,678 16,665,215",17316314,100.0,100.0 100.0,100.0,16060678,16665215,100.0,100.0
1,New England,840420,"867,593 893,812",924937,5.5,5.4 5.4,5.3,867593,893812,5.4,5.4
2,Connecticut,233960,"239,878 246,897",253036,1.5,1.5 1.5,1.5,239878,246897,1.5,1.5
3,Maine,52007,"53,180 54,609",55838,0.3,0.3 0.3,0.3,53180,54609,0.3,0.3
4,Massachusetts,412653,"428,350 441,467",459937,2.7,2.7 2.6,2.7,428350,441467,2.7,2.6
5,New Hampshire,64246,"66,490 68,701",71552,0.4,0.4 0.4,0.4,66490,68701,0.4,0.4
6,Rhode Island,49932,"51,346 53,300",54960,0.3,0.3 0.3,0.3,51346,53300,0.3,0.3
7,Vermont,27622,"28,348 28,838",29613,0.2,0.2 0.2,0.2,28348,28838,0.2,0.2
8,Mideast,2826317,"2,944,778 3,031,201",3143366,18.3,18.3 18.2,18.2,2944778,3031201,18.3,18.2
9,Delaware,59275,"59,104 60,816",62756,0.4,0.4 0.4,0.4,59104,60816,0.4,0.4


In [67]:
GDP11_13.cleanStates('Area')

Unnamed: 0,Area,2011,2012 2013,2014,2011.0,2012 2013.1,2014.1,2012,2013,2012 %,2013 %,Cleaned Area
0,United States1,15416873,"16,060,678 16,665,215",17316314,100.0,100.0 100.0,100.0,16060678,16665215,100.0,100.0,United States
1,New England,840420,"867,593 893,812",924937,5.5,5.4 5.4,5.3,867593,893812,5.4,5.4,New England
2,Connecticut,233960,"239,878 246,897",253036,1.5,1.5 1.5,1.5,239878,246897,1.5,1.5,Connecticut
3,Maine,52007,"53,180 54,609",55838,0.3,0.3 0.3,0.3,53180,54609,0.3,0.3,Maine
4,Massachusetts,412653,"428,350 441,467",459937,2.7,2.7 2.6,2.7,428350,441467,2.7,2.6,Massachusetts
5,New Hampshire,64246,"66,490 68,701",71552,0.4,0.4 0.4,0.4,66490,68701,0.4,0.4,New Hampshire
6,Rhode Island,49932,"51,346 53,300",54960,0.3,0.3 0.3,0.3,51346,53300,0.3,0.3,Rhode Island
7,Vermont,27622,"28,348 28,838",29613,0.2,0.2 0.2,0.2,28348,28838,0.2,0.2,Vermont
8,Mideast,2826317,"2,944,778 3,031,201",3143366,18.3,18.3 18.2,18.2,2944778,3031201,18.3,18.2,Mideast
9,Delaware,59275,"59,104 60,816",62756,0.4,0.4 0.4,0.4,59104,60816,0.4,0.4,Delaware


In [68]:
GDP11_13.addGeoLocColumn('Cleaned Area')

Unnamed: 0,Area,2011,2012 2013,2014,2011.0,2012 2013.1,2014.1,2012,2013,2012 %,2013 %,Cleaned Area,Geo Loc
0,United States1,15416873,"16,060,678 16,665,215",17316314,100.0,100.0 100.0,100.0,16060678,16665215,100.0,100.0,United States,
1,New England,840420,"867,593 893,812",924937,5.5,5.4 5.4,5.3,867593,893812,5.4,5.4,New England,
2,Connecticut,233960,"239,878 246,897",253036,1.5,1.5 1.5,1.5,239878,246897,1.5,1.5,Connecticut,New England
3,Maine,52007,"53,180 54,609",55838,0.3,0.3 0.3,0.3,53180,54609,0.3,0.3,Maine,New England
4,Massachusetts,412653,"428,350 441,467",459937,2.7,2.7 2.6,2.7,428350,441467,2.7,2.6,Massachusetts,New England
5,New Hampshire,64246,"66,490 68,701",71552,0.4,0.4 0.4,0.4,66490,68701,0.4,0.4,New Hampshire,New England
6,Rhode Island,49932,"51,346 53,300",54960,0.3,0.3 0.3,0.3,51346,53300,0.3,0.3,Rhode Island,New England
7,Vermont,27622,"28,348 28,838",29613,0.2,0.2 0.2,0.2,28348,28838,0.2,0.2,Vermont,New England
8,Mideast,2826317,"2,944,778 3,031,201",3143366,18.3,18.3 18.2,18.2,2944778,3031201,18.3,18.2,Mideast,
9,Delaware,59275,"59,104 60,816",62756,0.4,0.4 0.4,0.4,59104,60816,0.4,0.4,Delaware,Mideast


In [69]:
GDP11_13.restructureDataFrame('Cleaned Area')

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,2013,Alaska,2014,57276.0,Far West
1,2013,Alabama,2014,194671.0,Southeast
2,2013,Arkansas,2014,118553.0,Southeast
3,2013,Arizona,2014,274734.0,Southwest
4,2013,California,2014,2212991.0,Far West
5,2013,Colorado,2014,288338.0,Rocky Mountain
6,2013,Connecticut,2014,246897.0,New England
7,2013,District of Columbia,2014,111695.0,Mideast
8,2013,Delaware,2014,60816.0,Mideast
9,2013,Florida,2014,800697.0,Southeast


# Combine Data Frames

In [70]:
# first, need to create a list of DataFrames to concatenate
dfs = [GDP95_01.df, GDP01_03.df, GDP04_06.df, GDP07_09.df, GDP10_12.df, GDP11_13.df]
dfs

[     year                 state current dollars        GDP        GDP_area
 0    1995                Alaska            2001    24791.0        Far West
 1    1995               Alabama            2001    95514.0       Southeast
 2    1995              Arkansas            2001    53809.0       Southeast
 3    1995               Arizona            2001   104586.0       Southwest
 4    1995            California            2001   925931.0        Far West
 5    1995              Colorado            2001   109021.0  Rocky Mountain
 6    1995           Connecticut            2001   118645.0     New England
 7    1995  District of Columbia            2001    48408.0         Mideast
 8    1995              Delaware            2001    27575.0         Mideast
 9    1995               Florida            2001   344771.0       Southeast
 10   1995               Georgia            2001   203505.0       Southeast
 11   1995                Hawaii            2001    37243.0        Far West
 12   1995  

In [71]:
# concatenate DataFrames into one DataFrame using pd.concat()
yearlyGDPbyState = pd.concat(dfs, ignore_index = True)
yearlyGDPbyState

Unnamed: 0,year,state,current dollars,GDP,GDP_area
0,1995,Alaska,2001,24791.0,Far West
1,1995,Alabama,2001,95514.0,Southeast
2,1995,Arkansas,2001,53809.0,Southeast
3,1995,Arizona,2001,104586.0,Southwest
4,1995,California,2001,925931.0,Far West
5,1995,Colorado,2001,109021.0,Rocky Mountain
6,1995,Connecticut,2001,118645.0,New England
7,1995,District of Columbia,2001,48408.0,Mideast
8,1995,Delaware,2001,27575.0,Mideast
9,1995,Florida,2001,344771.0,Southeast


In [72]:
# write the DataFrame to a CSV
yearlyGDPbyState.to_csv('yearlyGDPbyState.csv')