# Exploring the row DataFrame of Natural Gas movements

The purpose is just to understand the structure, the voids and the characteristics of the original dataframe

In [35]:
import pandas as pd # data science library o manipulate data

In [36]:
# Reading the file
df = pd.read_csv('GTF_export_202401.csv', delimiter=';')
df.info()
# We have 247 Border points and (188-4=) 184 months
# Data are from oct 2018 to jan 2024 (3 months of 2018, 1 month of 2024 and 15 full years)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 247 entries, 0 to 246
Columns: 188 entries, Borderpoint to Jan-24
dtypes: float64(184), object(4)
memory usage: 362.9+ KB


In [37]:
# Showing the rows with at least one NaN
df[df.isnull().any(axis='columns')]
# We need to find a solution to fill the NaN values.
# If just one is missing, we could interpolate with the adjacent months

Unnamed: 0,Borderpoint,Exit,Entry,MAXFLOW (Mm3/h),Oct-08,Nov-08,Dec-08,Jan-09,Feb-09,Mar-09,...,Apr-23,May-23,Jun-23,Jul-23,Aug-23,Sep-23,Oct-23,Nov-23,Dec-23,Jan-24
2,Alveringem,Belgium,France,,0.0,0.0,0.0,0.0,0.0,0.0,...,8.0,2.0,0.0,3.0,8.0,0.0,0.0,0.0,0.0,
9,Basel,Germany,Switzerland,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0
12,Bauska,Latvia,Lithuania,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13,BBL,Netherlands,United Kingdom,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,
14,BBL,United Kingdom,Netherlands,,0.0,0.0,0.0,0.0,0.0,0.0,...,430.0,461.0,211.0,280.0,220.0,143.0,7.0,27.0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238,Zandvliet (H),Netherlands,Belgium,,82.0,80.0,76.0,78.0,73.0,71.0,...,,,,,,,,,,
239,Zandvliet (L),Netherlands,Belgium,,75.0,55.0,45.0,51.0,58.0,77.0,...,,,,,,,,,,
241,Zeebrugge (ZPT),Norway,Belgium,15,883.0,1050.0,1115.0,1276.0,1169.0,1184.0,...,1307.0,1264.0,1189.0,1341.0,1277.0,813.0,1324.0,1273.0,1322.0,1339.0
244,Zelzate (Zebra),Belgium,Netherlands,,134.0,111.0,110.0,131.0,113.0,113.0,...,,,,,,,,,,


In [38]:
# Showing if the same two countries deal through different borders
df.groupby(['Exit', 'Entry']).size()

Exit            Entry                
Albania         Italy                    1
Algeria         Spain                    1
                Tunisia                  1
Austria         Germany                  3
                Hungary                  1
                                        ..
United Kingdom  Ireland                  1
                Isle Of Man              1
                Liquefied Natural Gas    1
                Netherlands              2
                Norway                   1
Length: 144, dtype: int64

In [39]:
# For example there is the flow Austria -> Germany in three different borderpoints
df[df["Exit"]=="Austria"]

Unnamed: 0,Borderpoint,Exit,Entry,MAXFLOW (Mm3/h),Oct-08,Nov-08,Dec-08,Jan-09,Feb-09,Mar-09,...,Apr-23,May-23,Jun-23,Jul-23,Aug-23,Sep-23,Oct-23,Nov-23,Dec-23,Jan-24
10,Baumgarten,Austria,Slovak Republic,11,0.0,0.0,0.0,0.0,0.0,0.0,...,50.0,196.0,250.0,217.0,194.0,2.0,46.0,20.0,37.0,89.0
139,Mosonmagyarovar,Austria,Hungary,6,170.0,208.0,203.0,197.0,137.0,215.0,...,260.0,395.0,290.0,402.0,231.0,71.0,19.0,31.0,5.0,5.0
141,Murfeld,Austria,Slovenia,4,176.0,165.0,143.0,144.0,141.0,161.0,...,57.0,63.0,37.0,39.0,23.0,29.0,29.0,56.0,71.0,75.0
160,Oberkappel,Austria,Germany,5,8.0,9.0,15.0,20.0,16.0,16.0,...,0.0,0.0,0.0,0.0,0.0,26.0,41.0,111.0,65.0,61.0
179,Ruggel,Austria,Switzerland,1,6.0,8.0,9.0,10.0,8.0,7.0,...,5.0,3.0,2.0,2.0,2.0,2.0,4.0,6.0,7.0,9.0
199,Tarvisio,Austria,Italy,47,1615.0,1972.0,2448.0,1310.0,1565.0,1357.0,...,258.0,261.0,109.0,105.0,53.0,99.0,68.0,40.0,197.0,218.0
210,Uberackern,Austria,Germany,2,46.0,51.0,82.0,111.0,87.0,85.0,...,0.0,0.0,0.0,0.0,0.0,2.0,15.0,23.0,3.0,3.0
211,Uberackern II,Austria,Germany,9,121.0,135.0,217.0,294.0,229.0,224.0,...,0.0,0.0,0.0,0.0,0.0,1.0,1.0,3.0,0.0,6.0


In [40]:
# Plot the reoccurrences of flows
mask = df.groupby(['Exit', 'Entry']).size() > 1
df.groupby(['Exit', 'Entry']).size()[mask]

Exit                   Entry                
Austria                Germany                  3
Belarus                Lithuania                2
                       Poland                   3
Belgium                France                   4
                       Germany                  3
                       Luxembourg               3
                       Netherlands              5
Bulgaria               Greece                   2
                       Romania                  2
                       Serbia                   2
Czech Republic         Germany                  4
                       Poland                   2
France                 Belgium                  2
                       Spain                    3
                       Switzerland              2
Germany                Austria                  4
                       Belgium                  2
                       Czech Republic           5
                       Netherlands              5
     