In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

---
#### Clean 2020_Table14.csv

In [2]:
df = pd.read_csv("./datasets_original/2020_Table14.csv", skipfooter=11)
df = df.drop(columns=["Textbox88", "Textbox84", "Textbox17"])
df = df.rename(columns={"Textbox41":"Model", "Textbox37":"Gross", "Code":"Net",
                        "Station":"Operator", "Textbox70":"Provider",
                        "Station1":"Name", "Textbox90":"EAF", "Textbox86":"UCF"})

# Rename in a second instance to avoid confusions with 'Code1'
df = df.rename(columns={'Code1':'Code'})

  df = pd.read_csv("./datasets_original/2020_Table14.csv", skipfooter=11)


In [3]:
df.tail(10)

Unnamed: 0,Country,Code,Name,Type,Model,ThermalPower,Gross,Net,Operator,Provider,Textbox74,Textbox94,Textbox78,EAF,UCF
428,USA,US -457,BRAIDWOOD-2,PWR,WH 4LP (DRYAMB,3645,1230,1160,EXELON,WH,1975-8,1988-5,1988-10,92.4,92.4
429,USA,US -458,RIVER BEND-1,BWR,BWR-6 (Mark 3),3091,1016,967,ENTERGY,GE,1977-3,1985-12,1986-6,84.8,84.9
430,USA,US -461,CLINTON-1,BWR,BWR-6 (Mark 3),3473,1098,1062,EXELON,GE,1975-10,1987-4,1987-11,82.2,82.2
431,USA,US -482,WOLF CREEK,PWR,WH 4LP (DRYAMB,3565,1285,1200,WCNOC,WH,1977-5,1985-6,1985-9,85.9,85.9
432,USA,US -483,CALLAWAY-1,PWR,WH 4LP (DRYAMB,3565,1275,1215,AmerenUE,WH,1975-9,1984-10,1984-12,88.4,88.4
433,USA,US -498,SOUTH TEXAS-1,PWR,WH 4LP (DRYAMB,3853,1354,1280,STP,WH,1975-12,1988-3,1988-8,84.4,84.4
434,USA,US -499,SOUTH TEXAS-2,PWR,WH 4LP (DRYAMB,3853,1354,1280,STP,WH,1975-12,1989-4,1989-6,84.3,84.3
435,USA,US -528,PALO VERDE-1,PWR,CE80 2LP (DRYA,3990,1414,1311,APS,CE,1976-5,1985-6,1986-1,82.2,82.3
436,USA,US -529,PALO VERDE-2,PWR,CE80 2LP (DRYA,3990,1414,1314,APS,CE,1976-6,1986-5,1986-9,84.5,84.5
437,USA,US -530,PALO VERDE-3,PWR,CE80 2LP (DRYA,3990,1414,1312,APS,CE,1976-6,1987-11,1988-1,86.3,86.5


Set the dates with a little parsing.
The Operation field has NaNs, so it needs a little more care.

In [4]:
df['Start_year'] = df["Textbox74"].str.split('-', expand=True)[0].astype(int)
## Trick to clean up leading zeros
df['Start_month'] = df["Textbox74"].str.split('-', expand=True)[1].astype(int).astype(str).str.zfill(2).astype(str)
df['Start'] = pd.to_datetime(df['Start_year'].astype(str) + '-' + df['Start_month'].str[-2:] + '-01')

In [5]:
df['Connection_year'] = df["Textbox94"].str.split('-', expand=True)[0]
## Trick to clean up leading zeros
df['Connection_month'] = df["Textbox94"].str.split('-', expand=True)[1].astype(int).astype(str).str.zfill(2).astype(str)
df['Connection'] = pd.to_datetime(df['Connection_year'] + '-' + df['Connection_month'].str[-2:] + '-01')

In [6]:
connected = ~df['Textbox78'].isna()

In [7]:
df['Operation_year'] = np.nan
df['Operation_month'] = np.nan
df['Operation'] = np.nan

In [8]:
df.loc[connected, 'Operation_year'] = df.loc[connected, "Textbox78"].str.split('-', expand=True)[0]
## Trick to clean up leading zeros
df.loc[connected, 'Operation_month'] = df.loc[connected, "Textbox78"].str.split('-', expand=True)[1].astype(int).astype(str).str.zfill(2).astype(str)
df.loc[connected, 'Operation'] = pd.to_datetime(df.loc[connected, 'Operation_year'] + '-' + df.loc[connected, 'Operation_month'].str[-2:] + '-01')

In [9]:
df.drop(columns=['Operation_year', 'Operation_month',
                 'Connection_year', 'Connection_month',
                 'Start_year', 'Start_month', 'Textbox74', 'Textbox94', 'Textbox78'], inplace=True)

In [10]:
df.head()

Unnamed: 0,Country,Code,Name,Type,Model,ThermalPower,Gross,Net,Operator,Provider,EAF,UCF,Start,Connection,Operation
0,ARGENTINA,AR -1,ATUCHA-1,PHWR,PHWR KWU,1179,362,340,NASA,SIEMENS,74.5,75.5,1968-06-01,1974-03-01,1974-06-01 00:00:00
1,ARGENTINA,AR -2,EMBALSE,PHWR,CANDU 6,2064,656,608,NASA,AECL,78.6,78.9,1974-04-01,1983-04-01,1984-01-01 00:00:00
2,ARGENTINA,AR -3,ATUCHA-2,PHWR,PHWR KWU,2160,745,693,NASA,SIEMENS,53.1,53.1,1981-07-01,2014-06-01,2016-05-01 00:00:00
3,ARMENIA,AM -19,ARMENIAN-2,PWR,VVER V-270,1375,451,415,ANPPCJSC,FAEA,65.6,67.7,1975-07-01,1980-01-01,1980-05-01 00:00:00
4,BELARUS,BY -1,BELARUSIAN-1,PWR,VVER V-491,3200,1194,1110,BelNPP,JSC ASE,0.0,0.0,2013-11-01,2020-11-01,


### Concat with last 4 powerplants from Taiwan

In [11]:
dt = pd.read_csv("./datasets_original/2020_Table14.csv", skipfooter=3, skiprows=442)
dt.head(20)

  dt = pd.read_csv("./datasets_original/2020_Table14.csv", skipfooter=3, skiprows=442)


Unnamed: 0,Country2,Code2,Station2,Type2,Textbox43,ThermalPower2,Textbox39,Code3,Station3,Textbox71,Textbox75,Textbox95,Textbox79,Textbox91,Textbox87,Textbox22
0,"TAIWAN,CHINA",TW -3,KUOSHENG-1,BWR,BWR-6,2894,985,985,TPC,GE,1975-11,1981-5,1981-12,83.9,84.7,-
1,"TAIWAN,CHINA",TW -4,KUOSHENG-2,BWR,BWR-6,2894,985,985,TPC,GE,1976-3,1982-6,1983-3,83.3,84.3,-
2,"TAIWAN,CHINA",TW -5,MAANSHAN-1,PWR,WH 3LP (WE 312,2822,951,936,TPC,WH,1978-8,1984-5,1984-7,86.7,87.7,-
3,"TAIWAN,CHINA",TW -6,MAANSHAN-2,PWR,WH 3LP (WE 312,2822,951,938,TPC,WH,1979-2,1985-2,1985-5,86.2,87.4,-


In [12]:
dt = dt.rename(columns={'Country2':'Country', 'Code2':'Code', 'Station2':'Name', 'Type2':'Type',
                   'Textbox43':'Model', 'ThermalPower2':'ThermalPower', 'Textbox39':'Gross',
                   'Code3':'Net', 'Station3':'Operator','Textbox71':'Provider',
                   'Textbox91':'EAF', 'Textbox87':'UCF'})
dt.drop(columns=['Textbox22'], inplace=True)

In [13]:
dt.head()

Unnamed: 0,Country,Code,Name,Type,Model,ThermalPower,Gross,Net,Operator,Provider,Textbox75,Textbox95,Textbox79,EAF,UCF
0,"TAIWAN,CHINA",TW -3,KUOSHENG-1,BWR,BWR-6,2894,985,985,TPC,GE,1975-11,1981-5,1981-12,83.9,84.7
1,"TAIWAN,CHINA",TW -4,KUOSHENG-2,BWR,BWR-6,2894,985,985,TPC,GE,1976-3,1982-6,1983-3,83.3,84.3
2,"TAIWAN,CHINA",TW -5,MAANSHAN-1,PWR,WH 3LP (WE 312,2822,951,936,TPC,WH,1978-8,1984-5,1984-7,86.7,87.7
3,"TAIWAN,CHINA",TW -6,MAANSHAN-2,PWR,WH 3LP (WE 312,2822,951,938,TPC,WH,1979-2,1985-2,1985-5,86.2,87.4


In [14]:
dt['Start_year'] = dt["Textbox75"].str.split('-', expand=True)[0].astype(int)
## Trick to clean up leading zeros
dt['Start_month'] = dt["Textbox75"].str.split('-', expand=True)[1].astype(int).astype(str).str.zfill(2).astype(str)
dt['Start'] = pd.to_datetime(dt['Start_year'].astype(str) + '-' + dt['Start_month'].str[-2:] + '-01')

In [15]:
dt['Connection_year'] = dt["Textbox95"].str.split('-', expand=True)[0]
## Trick to clean up leading zeros
dt['Connection_month'] = dt["Textbox95"].str.split('-', expand=True)[1].astype(int).astype(str).str.zfill(2).astype(str)
dt['Connection'] = pd.to_datetime(dt['Connection_year'] + '-' + dt['Connection_month'].str[-2:] + '-01')

In [16]:
dt['Operation_year'] = dt["Textbox95"].str.split('-', expand=True)[0]
## Trick to clean up leading zeros
dt['Operation_month'] = dt["Textbox95"].str.split('-', expand=True)[1].astype(int).astype(str).str.zfill(2).astype(str)
dt['Operation'] = pd.to_datetime(dt['Operation_year'] + '-' + dt['Operation_month'].str[-2:] + '-01')

In [17]:
dt.drop(columns=['Operation_year', 'Operation_month',
                 'Connection_year', 'Connection_month',
                 'Start_year', 'Start_month', 'Textbox75', 'Textbox95', 'Textbox79'], inplace=True)

In [18]:
dt

Unnamed: 0,Country,Code,Name,Type,Model,ThermalPower,Gross,Net,Operator,Provider,EAF,UCF,Start,Connection,Operation
0,"TAIWAN,CHINA",TW -3,KUOSHENG-1,BWR,BWR-6,2894,985,985,TPC,GE,83.9,84.7,1975-11-01,1981-05-01,1981-05-01
1,"TAIWAN,CHINA",TW -4,KUOSHENG-2,BWR,BWR-6,2894,985,985,TPC,GE,83.3,84.3,1976-03-01,1982-06-01,1982-06-01
2,"TAIWAN,CHINA",TW -5,MAANSHAN-1,PWR,WH 3LP (WE 312,2822,951,936,TPC,WH,86.7,87.7,1978-08-01,1984-05-01,1984-05-01
3,"TAIWAN,CHINA",TW -6,MAANSHAN-2,PWR,WH 3LP (WE 312,2822,951,938,TPC,WH,86.2,87.4,1979-02-01,1985-02-01,1985-02-01


In [19]:
df.columns == dt.columns

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True])

In [20]:
dr = pd.concat([df,dt], ignore_index=True)

In [21]:
dr.head()

Unnamed: 0,Country,Code,Name,Type,Model,ThermalPower,Gross,Net,Operator,Provider,EAF,UCF,Start,Connection,Operation
0,ARGENTINA,AR -1,ATUCHA-1,PHWR,PHWR KWU,1179,362,340,NASA,SIEMENS,74.5,75.5,1968-06-01,1974-03-01,1974-06-01 00:00:00
1,ARGENTINA,AR -2,EMBALSE,PHWR,CANDU 6,2064,656,608,NASA,AECL,78.6,78.9,1974-04-01,1983-04-01,1984-01-01 00:00:00
2,ARGENTINA,AR -3,ATUCHA-2,PHWR,PHWR KWU,2160,745,693,NASA,SIEMENS,53.1,53.1,1981-07-01,2014-06-01,2016-05-01 00:00:00
3,ARMENIA,AM -19,ARMENIAN-2,PWR,VVER V-270,1375,451,415,ANPPCJSC,FAEA,65.6,67.7,1975-07-01,1980-01-01,1980-05-01 00:00:00
4,BELARUS,BY -1,BELARUSIAN-1,PWR,VVER V-491,3200,1194,1110,BelNPP,JSC ASE,0.0,0.0,2013-11-01,2020-11-01,


In [22]:
dr.tail()

Unnamed: 0,Country,Code,Name,Type,Model,ThermalPower,Gross,Net,Operator,Provider,EAF,UCF,Start,Connection,Operation
437,USA,US -530,PALO VERDE-3,PWR,CE80 2LP (DRYA,3990,1414,1312,APS,CE,86.3,86.5,1976-06-01,1987-11-01,1988-01-01 00:00:00
438,"TAIWAN,CHINA",TW -3,KUOSHENG-1,BWR,BWR-6,2894,985,985,TPC,GE,83.9,84.7,1975-11-01,1981-05-01,1981-05-01 00:00:00
439,"TAIWAN,CHINA",TW -4,KUOSHENG-2,BWR,BWR-6,2894,985,985,TPC,GE,83.3,84.3,1976-03-01,1982-06-01,1982-06-01 00:00:00
440,"TAIWAN,CHINA",TW -5,MAANSHAN-1,PWR,WH 3LP (WE 312,2822,951,936,TPC,WH,86.7,87.7,1978-08-01,1984-05-01,1984-05-01 00:00:00
441,"TAIWAN,CHINA",TW -6,MAANSHAN-2,PWR,WH 3LP (WE 312,2822,951,938,TPC,WH,86.2,87.4,1979-02-01,1985-02-01,1985-02-01 00:00:00


In [23]:
dr['Shutdown'] = np.nan
dr['Construction_time'] = (dr['Connection'] - dr['Start']).dt.days

In [24]:
dr

Unnamed: 0,Country,Code,Name,Type,Model,ThermalPower,Gross,Net,Operator,Provider,EAF,UCF,Start,Connection,Operation,Shutdown,Construction_time
0,ARGENTINA,AR -1,ATUCHA-1,PHWR,PHWR KWU,1179,362,340,NASA,SIEMENS,74.5,75.5,1968-06-01,1974-03-01,1974-06-01 00:00:00,,2099
1,ARGENTINA,AR -2,EMBALSE,PHWR,CANDU 6,2064,656,608,NASA,AECL,78.6,78.9,1974-04-01,1983-04-01,1984-01-01 00:00:00,,3287
2,ARGENTINA,AR -3,ATUCHA-2,PHWR,PHWR KWU,2160,745,693,NASA,SIEMENS,53.1,53.1,1981-07-01,2014-06-01,2016-05-01 00:00:00,,12023
3,ARMENIA,AM -19,ARMENIAN-2,PWR,VVER V-270,1375,451,415,ANPPCJSC,FAEA,65.6,67.7,1975-07-01,1980-01-01,1980-05-01 00:00:00,,1645
4,BELARUS,BY -1,BELARUSIAN-1,PWR,VVER V-491,3200,1194,1110,BelNPP,JSC ASE,0.0,0.0,2013-11-01,2020-11-01,,,2557
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
437,USA,US -530,PALO VERDE-3,PWR,CE80 2LP (DRYA,3990,1414,1312,APS,CE,86.3,86.5,1976-06-01,1987-11-01,1988-01-01 00:00:00,,4170
438,"TAIWAN,CHINA",TW -3,KUOSHENG-1,BWR,BWR-6,2894,985,985,TPC,GE,83.9,84.7,1975-11-01,1981-05-01,1981-05-01 00:00:00,,2008
439,"TAIWAN,CHINA",TW -4,KUOSHENG-2,BWR,BWR-6,2894,985,985,TPC,GE,83.3,84.3,1976-03-01,1982-06-01,1982-06-01 00:00:00,,2283
440,"TAIWAN,CHINA",TW -5,MAANSHAN-1,PWR,WH 3LP (WE 312,2822,951,936,TPC,WH,86.7,87.7,1978-08-01,1984-05-01,1984-05-01 00:00:00,,2100


In [25]:
dr.to_csv('./datasets_clean/2020Table14_clean.csv', index=False)

In [27]:
dr.head(1)

Unnamed: 0,Country,Code,Name,Type,Model,ThermalPower,Gross,Net,Operator,Provider,EAF,UCF,Start,Connection,Operation,Shutdown,Construction_time
0,ARGENTINA,AR -1,ATUCHA-1,PHWR,PHWR KWU,1179,362,340,NASA,SIEMENS,74.5,75.5,1968-06-01,1974-03-01,1974-06-01 00:00:00,,2099


---
### Shutdown reactors

In [144]:
ds = pd.read_csv("./datasets_original/2020_Table16.csv", skipfooter=9)
ds.head()

  ds = pd.read_csv("./datasets_original/2020_Table16.csv", skipfooter=9)


Unnamed: 0,IsoCode,Textbox8,Textbox22,Textbox26,Textbox34,Textbox38,Textbox42,Textbox46,Textbox57,Textbox63,Textbox67,Textbox71,Textbox55
0,ARMENIA,AM -18,ARMENIAN-1,PWR,1375,408,376,ANPPCJSC,FAEA,1969-7,1976-12,1977-10,1989-2
1,BELGIUM,BE -1,BR-3,PWR,41,12,10,CEN/SCK,WH,1957-11,1962-10,1962-10,1987-6
2,BULGARIA,BG -1,KOZLODUY-1,PWR,1375,440,408,KOZNPP,AEE,1970-4,1974-7,1974-10,2002-12
3,BULGARIA,BG -2,KOZLODUY-2,PWR,1375,440,408,KOZNPP,AEE,1970-4,1975-8,1975-11,2002-12
4,BULGARIA,BG -3,KOZLODUY-3,PWR,1375,440,408,KOZNPP,AEE,1973-10,1980-12,1981-1,2006-12


In [145]:
ds.rename(columns={'IsoCode':'Country', 'Textbox8':'Code', 'Textbox22':'Name',
                   'Textbox26':'Type', 'Textbox34':'ThermalPower', 'Textbox38':'Gross', 'Textbox42':'Net',
                   'Textbox46':'Operator', 'Textbox57':'Provider',
                   'Textbox63':'St', 'Textbox67':'Co', 'Textbox71':'Op', 'Textbox55':'De'}, inplace=True)

In [146]:
ds.tail(10)

Unnamed: 0,Country,Code,Name,Type,ThermalPower,Gross,Net,Operator,Provider,St,Co,Op,De
180,USA,US -146,SAXTON,PWR,24,3,3,SNEC,GE,1960-1,1967-3,1967-3,1972-5
181,USA,US -001,SHIPPINGPORT,PWR,236,68,60,DOE DUQU,WH,1954-1,1957-12,1958-5,1982-10
182,USA,US -322,SHOREHAM,BWR,2436,849,820,LIPA,GE,1972-11,1986-8,1986-8,1989-5
183,USA,US -289,THREE MILE ISLAND-1,PWR,2568,880,819,EXELON,B&W,1968-5,1974-6,1974-9,2019-9
184,USA,US -320,THREE MILE ISLAND-2,PWR,2772,959,880,GPU,B&W,1969-11,1978-4,1978-12,1979-3
185,USA,US -344,TROJAN,PWR,3411,1155,1095,PORTGE,WH,1970-2,1975-12,1976-5,1992-11
186,USA,US -271,VERMONT YANKEE,BWR,1912,635,605,ENTERGY,GE,1967-12,1972-9,1972-11,2014-12
187,USA,US -29,YANKEE NPS,PWR,600,180,167,YAEC,WH,1957-11,1960-11,1961-7,1991-10
188,USA,US -295,ZION-1,PWR,3250,1085,1040,EXELON,WH,1968-12,1973-6,1973-12,1998-2
189,USA,US -304,ZION-2,PWR,3250,1085,1040,EXELON,WH,1968-12,1973-12,1974-9,1998-2


In [147]:
dst = pd.read_csv("./datasets_original/2020_Table16.csv", skiprows=194, skipfooter=2)
dst.rename(columns={'IsoCode2':'Country', 'Textbox10':'Code', 'Textbox24':'Name',
                   'Textbox29':'Type', 'Textbox36':'ThermalPower', 'Textbox40':'Gross', 'Textbox44':'Net',
                   'Textbox48':'Operator', 'Textbox82':'Provider',
                   'Textbox83':'St', 'Textbox84':'Co', 'Textbox85':'Op', 'Textbox86':'De'}, inplace=True)

  dst = pd.read_csv("./datasets_original/2020_Table16.csv", skiprows=194, skipfooter=2)


In [148]:
dst.head()

Unnamed: 0,Country,Code,Name,Type,ThermalPower,Gross,Net,Operator,Provider,St,Co,Op,De
0,"TAIWAN,CHINA",TW -1,CHINSHAN-1,BWR,1840,636,604,TPC,GE,1972-6,1977-11,1978-12,2018-12
1,"TAIWAN,CHINA",TW -2,CHINSHAN-2,BWR,1840,636,604,TPC,GE,1973-12,1978-12,1979-7,2019-7


In [149]:
dc = pd.concat([ds, dst], ignore_index=True)
dc.tail()

Unnamed: 0,Country,Code,Name,Type,ThermalPower,Gross,Net,Operator,Provider,St,Co,Op,De
187,USA,US -29,YANKEE NPS,PWR,600,180,167,YAEC,WH,1957-11,1960-11,1961-7,1991-10
188,USA,US -295,ZION-1,PWR,3250,1085,1040,EXELON,WH,1968-12,1973-6,1973-12,1998-2
189,USA,US -304,ZION-2,PWR,3250,1085,1040,EXELON,WH,1968-12,1973-12,1974-9,1998-2
190,"TAIWAN,CHINA",TW -1,CHINSHAN-1,BWR,1840,636,604,TPC,GE,1972-6,1977-11,1978-12,2018-12
191,"TAIWAN,CHINA",TW -2,CHINSHAN-2,BWR,1840,636,604,TPC,GE,1973-12,1978-12,1979-7,2019-7


In [150]:
dc['Start_year'] = dc["St"].str.split('-', expand=True)[0].astype(int)
## Trick to clean up leading zeros
dc['Start_month'] = dc["St"].str.split('-', expand=True)[1].astype(int).astype(str).str.zfill(2).astype(str)
dc['Start'] = pd.to_datetime(dc['Start_year'].astype(str) + '-' + dc['Start_month'].str[-2:] + '-01')

In [151]:
dc['Start_year'] = dc["Co"].str.split('-', expand=True)[0].astype(int)
## Trick to clean up leading zeros
dc['Start_month'] = dc["Co"].str.split('-', expand=True)[1].astype(int).astype(str).str.zfill(2).astype(str)
dc['Connection'] = pd.to_datetime(dc['Start_year'].astype(str) + '-' + dc['Start_month'].str[-2:] + '-01')

In [152]:
operative = ~dc['Op'].isna()

In [153]:
dc.loc[operative, 'Operation_year'] = dc.loc[operative, "Op"].str.split('-', expand=True)[0]
## Trick to clean up leading zeros
dc.loc[operative, 'Operation_month'] = dc.loc[operative, "Op"].str.split('-', expand=True)[1].astype(int).astype(str).str.zfill(2).astype(str)
dc.loc[operative, 'Operation'] = pd.to_datetime(dc.loc[operative, 'Operation_year'] + '-' + dc.loc[connected, 'Operation_month'].str[-2:] + '-01')

In [154]:
dc['Start_year'] = dc["De"].str.split('-', expand=True)[0].astype(int)
## Trick to clean up leading zeros
dc['Start_month'] = dc["De"].str.split('-', expand=True)[1].astype(int).astype(str).str.zfill(2).astype(str)
dc['Shutdown'] = pd.to_datetime(dc['Start_year'].astype(str) + '-' + dc['Start_month'].str[-2:] + '-01')

In [155]:
dc.columns

Index(['Country', 'Code', 'Name', 'Type', 'ThermalPower', 'Gross', 'Net',
       'Operator', 'Provider', 'St', 'Co', 'Op', 'De', 'Start_year',
       'Start_month', 'Start', 'Connection', 'Operation_year',
       'Operation_month', 'Operation', 'Shutdown'],
      dtype='object')

In [156]:
dc.drop(columns=['St', 'Co','Op', 'De', 'Start_year', 'Start_month', 'Operation_year', 'Operation_month'], inplace=True)

In [157]:
dc.head()

Unnamed: 0,Country,Code,Name,Type,ThermalPower,Gross,Net,Operator,Provider,Start,Connection,Operation,Shutdown
0,ARMENIA,AM -18,ARMENIAN-1,PWR,1375,408,376,ANPPCJSC,FAEA,1969-07-01,1976-12-01,1977-10-01,1989-02-01
1,BELGIUM,BE -1,BR-3,PWR,41,12,10,CEN/SCK,WH,1957-11-01,1962-10-01,1962-10-01,1987-06-01
2,BULGARIA,BG -1,KOZLODUY-1,PWR,1375,440,408,KOZNPP,AEE,1970-04-01,1974-07-01,1974-10-01,2002-12-01
3,BULGARIA,BG -2,KOZLODUY-2,PWR,1375,440,408,KOZNPP,AEE,1970-04-01,1975-08-01,1975-11-01,2002-12-01
4,BULGARIA,BG -3,KOZLODUY-3,PWR,1375,440,408,KOZNPP,AEE,1973-10-01,1980-12-01,NaT,2006-12-01


In [159]:
dc['Model'] = np.nan
dc['EAF'] = np.nan
dc['UCF'] = np.nan

In [164]:
dr.columns

Index(['Country', 'Code', 'Name', 'Type', 'Model', 'ThermalPower', 'Gross',
       'Net', 'Operator', 'Provider', 'EAF', 'UCF', 'Start', 'Connection',
       'Operation', 'Shutdown', 'Construction_time'],
      dtype='object')

In [166]:
dc = dc[['Country', 'Code', 'Name', 'Type', 'Model', 'ThermalPower', 'Gross','Net', 'Operator', 'Provider', 'EAF', 'UCF', 'Start', 'Connection','Operation', 'Shutdown']]

In [167]:
dc['Construction_time'] = (dc['Connection'] - dc['Start']).dt.days

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc['Construction_time'] = (dc['Connection'] - dc['Start']).dt.days


In [168]:
dc

Unnamed: 0,Country,Code,Name,Type,Model,ThermalPower,Gross,Net,Operator,Provider,EAF,UCF,Start,Connection,Operation,Shutdown,Construction_time
0,ARMENIA,AM -18,ARMENIAN-1,PWR,,1375,408,376,ANPPCJSC,FAEA,,,1969-07-01,1976-12-01,1977-10-01,1989-02-01,2710
1,BELGIUM,BE -1,BR-3,PWR,,41,12,10,CEN/SCK,WH,,,1957-11-01,1962-10-01,1962-10-01,1987-06-01,1795
2,BULGARIA,BG -1,KOZLODUY-1,PWR,,1375,440,408,KOZNPP,AEE,,,1970-04-01,1974-07-01,1974-10-01,2002-12-01,1552
3,BULGARIA,BG -2,KOZLODUY-2,PWR,,1375,440,408,KOZNPP,AEE,,,1970-04-01,1975-08-01,1975-11-01,2002-12-01,1948
4,BULGARIA,BG -3,KOZLODUY-3,PWR,,1375,440,408,KOZNPP,AEE,,,1973-10-01,1980-12-01,NaT,2006-12-01,2618
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,USA,US -29,YANKEE NPS,PWR,,600,180,167,YAEC,WH,,,1957-11-01,1960-11-01,1961-07-01,1991-10-01,1096
188,USA,US -295,ZION-1,PWR,,3250,1085,1040,EXELON,WH,,,1968-12-01,1973-06-01,1973-12-01,1998-02-01,1643
189,USA,US -304,ZION-2,PWR,,3250,1085,1040,EXELON,WH,,,1968-12-01,1973-12-01,1974-09-01,1998-02-01,1826
190,"TAIWAN,CHINA",TW -1,CHINSHAN-1,BWR,,1840,636,604,TPC,GE,,,1972-06-01,1977-11-01,1978-12-01,2018-12-01,1979


In [169]:
dr['Online'] = True
dc['Online'] = False

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dc['Online'] = False


In [171]:
dall = pd.concat([dc, dr], ignore_index=True)

In [172]:
dall

Unnamed: 0,Country,Code,Name,Type,Model,ThermalPower,Gross,Net,Operator,Provider,EAF,UCF,Start,Connection,Operation,Shutdown,Construction_time,Online
0,ARMENIA,AM -18,ARMENIAN-1,PWR,,1375,408,376,ANPPCJSC,FAEA,,,1969-07-01,1976-12-01,1977-10-01 00:00:00,1989-02-01,2710,False
1,BELGIUM,BE -1,BR-3,PWR,,41,12,10,CEN/SCK,WH,,,1957-11-01,1962-10-01,1962-10-01 00:00:00,1987-06-01,1795,False
2,BULGARIA,BG -1,KOZLODUY-1,PWR,,1375,440,408,KOZNPP,AEE,,,1970-04-01,1974-07-01,1974-10-01 00:00:00,2002-12-01,1552,False
3,BULGARIA,BG -2,KOZLODUY-2,PWR,,1375,440,408,KOZNPP,AEE,,,1970-04-01,1975-08-01,1975-11-01 00:00:00,2002-12-01,1948,False
4,BULGARIA,BG -3,KOZLODUY-3,PWR,,1375,440,408,KOZNPP,AEE,,,1973-10-01,1980-12-01,NaT,2006-12-01,2618,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
629,USA,US -530,PALO VERDE-3,PWR,CE80 2LP (DRYA,3990,1414,1312,APS,CE,86.3,86.5,1976-06-01,1987-11-01,1988-01-01 00:00:00,NaT,4170,True
630,"TAIWAN,CHINA",TW -3,KUOSHENG-1,BWR,BWR-6,2894,985,985,TPC,GE,83.9,84.7,1975-11-01,1981-05-01,1981-05-01 00:00:00,NaT,2008,True
631,"TAIWAN,CHINA",TW -4,KUOSHENG-2,BWR,BWR-6,2894,985,985,TPC,GE,83.3,84.3,1976-03-01,1982-06-01,1982-06-01 00:00:00,NaT,2283,True
632,"TAIWAN,CHINA",TW -5,MAANSHAN-1,PWR,WH 3LP (WE 312,2822,951,936,TPC,WH,86.7,87.7,1978-08-01,1984-05-01,1984-05-01 00:00:00,NaT,2100,True


In [173]:
dall.to_csv('./datasets_clean/all_reactors.csv', index=False)