In [286]:
import sys

import requests
from bs4 import BeautifulSoup
import re
import unicodedata
import pandas as pd
import numpy as np

## Scraping the data for the windparks from Wikipedia , Bundesland, one after another:

### Baden-Würtemeberg:

In [287]:
def extract_column_from_header(row):
    """
    This function returns the landing status from the HTML table cell 
    Input: the  element of a table data cell extracts extra row
    """
    if (row.br):
        row.br.extract()
    if row.a:
        row.a.extract()
    if row.sup:
        row.sup.extract()
        
    colunm_name = ' '.join(row.contents)
    
    # Filter the digit and empty names
    if not(colunm_name.strip().isdigit()):
        colunm_name = colunm_name.strip()
        return colunm_name    


In [327]:
#baden-würtemberg windparks

bw_url = "https://de.wikipedia.org/wiki/Liste_von_Windkraftanlagen_in_Baden-Württemberg"


# use requests.get() method with the provided bw_url
response_bw=requests.get(bw_url).text

# assign the response to a object
# Use BeautifulSoup() to create a BeautifulSoup object from a response text content
soup_bw=BeautifulSoup(response_bw, 'html.parser')

# Use soup.title attribute
print('The title is:')
for title in soup_bw.find_all('title'):
    print(title.get_text())
    
# Use the find_all function in the BeautifulSoup object, with element type `table`
# Assign the result to a list called `html_tables`
html_tables=soup_bw.find_all('table')

#to see how many objects in the list

len(html_tables)






The title is:
Liste der Landkreise in Deutschland – Wikipedia


1

In [289]:
# Let's print the third table and check its content
table_BW = html_tables[0]
print(table_BW)  

<table class="wikitable sortable zebra tabelle-kopf-fixiert" style="font-size:95%">
<tbody><tr bgcolor="#dfdfdf">
<th width="15%">Name
</th>
<th width="6%">Baujahr
</th>
<th width="4%">Gesamt-<br/>leistung (MW)
</th>
<th width="4%">Anzahl
</th>
<th width="20%">Typ (WKA)
</th>
<th width="10%">Ort
</th>
<th width="4%">Land-<br/>kreis
</th>
<th width="7%">Koordinaten
</th>
<th width="15%">Projektierer / Betreiber
</th>
<th width="15%">Bemerkungen
</th></tr>
<tr>
<td><a href="/wiki/Fraunhofer-Institut_f%C3%BCr_Chemische_Technologie" title="Fraunhofer-Institut für Chemische Technologie">Forschungswindkraftanlage RedoxWind</a><sup class="reference" id="cite_ref-4"><a href="#cite_note-4">[4]</a></sup>
</td>
<td style="text-align:center">2017
</td>
<td style="text-align:center">2,0
</td>
<td style="text-align:center">1
</td>
<td>Qreon Q82 (1×)
</td>
<td style="text-align:center"><a href="/wiki/Karlsruhe" title="Karlsruhe">Karlsruhe</a>
</td>
<td style="text-align:center"><a href="/wiki/Karlsru

In [290]:
column_names = []

# Apply find_all() function with `th` element on first_launch_table
# Iterate each th element and apply the provided extract_column_from_header() to get a column name
# Append the Non-empty column name (`if name is not None and len(name) > 0`) into a list called column_names

columns=table_BW.find_all('th')
for column in columns:
    name=extract_column_from_header(column)
    
    if name!= None:
        if len(name)>0:
            column_names.append(name)
    

In [291]:
print(column_names)

['Name', 'Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Koordinaten', 'Projektierer / Betreiber', 'Bemerkungen']


In [292]:
table_dict= dict.fromkeys(column_names)



# Let's initial the table_dict with each value to be an empty list
table_dict['Name'] = []
table_dict['Baujahr'] = []
table_dict['Gesamt- leistung (MW)'] = []
table_dict['Anzahl'] = []
table_dict['Typ (WKA)'] = []
table_dict['Ort'] = []
table_dict['Land- kreis'] = []

table_dict['Koordinaten']=[]
table_dict['Projektierer / Betreiber']=[]
table_dict['Bemerkungen']=[]


In [293]:
rows=[]
table_rows = table_BW.find_all('tr')    

for row in table_rows:
    td = row.find_all('td')
    row = [row.text for row in td]
    rows.append(row)


In [294]:
rows

[[],
 ['Forschungswindkraftanlage RedoxWind[4]\n',
  '2017\n',
  '2,0\n',
  '1\n',
  'Qreon Q82 (1×)\n',
  'Karlsruhe\n',
  'KA\n',
  '49°\xa01′\xa09″\xa0N, 8°\xa031′\xa04″\xa0O\n',
  'Fraunhofer ICT\n',
  'Forschungsprojekt mit kombiniertem Batteriespeicher\n'],
 ['Windkraftanlage Alexanderschanze[5]\n',
  '2003\n',
  '1,5\n',
  '1\n',
  'Südwind S70 (1×)\n',
  'Bad Peterstal-Griesbach\n',
  'OG\n',
  '48°\xa028′\xa022″\xa0N, 8°\xa016′\xa07″\xa0O\n',
  'Schmalz, Windenergieanlage „Bei der Schanz“\n',
  'errichtet an der Schwarzwaldhochstraße im Bereich der Alexanderschanze\n'],
 ['Windkraftanlage Alpirsbach[6]\n',
  '19992013\n',
  '2,3\n',
  '1\n',
  'Enercon E-70 E4 (1×)\n',
  'Römlinsdorf\n',
  'FDS\n',
  '48°\xa020′\xa00″\xa0N, 8°\xa028′\xa034″\xa0O\n',
  'Windkraft Römlinsdorf, Stadtwerke Stuttgart\n',
  'Repowering 2013 (1× Enercon E-70 E4 statt 1× Fuhrländer FL 300/30)\n'],
 ['Windkraftanlage Auenberg[7]\n',
  '1990\n',
  '0,095\n',
  '1\n',
  'Reymo DANmark 22 (1×)\n',
  'Haus

In [295]:
len(rows)

202

In [296]:
# Example of string cleaning:

name=rows[1][0].strip()
name=re.sub(r'\[\d+\]', '', name)
name



'Forschungswindkraftanlage RedoxWind'

In [297]:
#Example of years string cleaning (not completely perfect)

years=rows[3][1].strip()

commas=[3,7,11,15, 19, 23, 27,31]
output=''.join(c + ',' * (i in commas) for i, c in enumerate(years))


output

'1999,2013,'

In [298]:
#filling the dictionary
for x in rows[1:201]:
    
    years=x[1].strip()
    years=''.join(c + ',' * (i in commas) for i, c in enumerate(years))
    table_dict['Baujahr'].append(years)
    
table_dict['Baujahr']

['2017,',
 '2003,',
 '1999,2013,',
 '1990,',
 '1992,',
 '2002,',
 '2001,',
 '2014,',
 '2020,',
 '2015,',
 '2001,',
 '2014,',
 '1998,',
 '2003,',
 '2003,',
 '2001,',
 '1993,1995,',
 '1996,',
 '1997,',
 '2005,',
 '2003,',
 '2000,',
 '2000,',
 '1999,',
 '2007,',
 '1994,1996,2015,',
 '2012,',
 '2023,',
 '2023,-202,4',
 '2023,-202,4',
 '1994,',
 '2003,',
 '1997,',
 '2002,',
 '2006,',
 '2012,',
 '2003,',
 '2020,',
 '2003,',
 '1997,1998,2013,',
 '1996,',
 '2006,',
 '2001,',
 '2011,',
 '2002,',
 '2022,',
 '1990,',
 '2005,',
 '1998,',
 '2004,',
 '2000,',
 '2024,',
 '1998,2005,',
 '2017,',
 '2004,2006,',
 '1996,',
 '1997,2003,',
 '2003,',
 '2002,2003,',
 '2005,2010,',
 '2024,',
 '2023,',
 '2002,',
 '2003,2024,',
 '1998,2000,2003,2019,',
 '2020,',
 '2001,',
 '2015,',
 '1999,',
 '2000,2002,',
 '2003,',
 '2001,2002,',
 '2008,',
 '2000,',
 '2024,',
 '2000,2003,',
 '2005,',
 '2001,',
 '2001,',
 '2016,',
 '1989,1990,',
 '2017,',
 '2011,2016,2017,',
 '2000,2011,2023,',
 '2001,2002,2003,2018,2023,-202,4

In [299]:
#I decided to leave only the first date
table_dict['Baujahr'] = [x[0:4] for x in table_dict['Baujahr']]
table_dict['Baujahr']

['2017',
 '2003',
 '1999',
 '1990',
 '1992',
 '2002',
 '2001',
 '2014',
 '2020',
 '2015',
 '2001',
 '2014',
 '1998',
 '2003',
 '2003',
 '2001',
 '1993',
 '1996',
 '1997',
 '2005',
 '2003',
 '2000',
 '2000',
 '1999',
 '2007',
 '1994',
 '2012',
 '2023',
 '2023',
 '2023',
 '1994',
 '2003',
 '1997',
 '2002',
 '2006',
 '2012',
 '2003',
 '2020',
 '2003',
 '1997',
 '1996',
 '2006',
 '2001',
 '2011',
 '2002',
 '2022',
 '1990',
 '2005',
 '1998',
 '2004',
 '2000',
 '2024',
 '1998',
 '2017',
 '2004',
 '1996',
 '1997',
 '2003',
 '2002',
 '2005',
 '2024',
 '2023',
 '2002',
 '2003',
 '1998',
 '2020',
 '2001',
 '2015',
 '1999',
 '2000',
 '2003',
 '2001',
 '2008',
 '2000',
 '2024',
 '2000',
 '2005',
 '2001',
 '2001',
 '2016',
 '1989',
 '2017',
 '2011',
 '2000',
 '2001',
 '1998',
 '2017',
 '2002',
 '2000',
 '2007',
 '2019',
 '2002',
 '2017',
 '2018',
 '2001',
 '2000',
 '2004',
 '2014',
 '1997',
 '2016',
 '2017',
 '2021',
 '2016',
 '2024',
 '2016',
 '2017',
 '2000',
 '2000',
 '2016',
 '2016',
 '2000',
 

In [300]:
#filling the dictionaries:

for i in rows [1:201]:
    if i[0]!=None:
        name=i[0].strip()
        name=re.sub(r'\[\d+\]', '', name)
        table_dict['Name'].append(name)
        
    
    
    table_dict['Gesamt- leistung (MW)'].append(i[2].strip())
    table_dict['Anzahl'].append(i[3].strip())
    table_dict['Typ (WKA)'].append(i[4].strip())
    table_dict['Ort'].append(i[5].strip())
    table_dict['Land- kreis'].append(i[6].strip())

    table_dict['Koordinaten'].append(i[7].strip())
    table_dict['Projektierer / Betreiber'].append(i[8].strip())
    table_dict['Bemerkungen'].append(i[-1].strip())                                             
  
    

In [301]:
table_dict['Name']

['Forschungswindkraftanlage RedoxWind',
 'Windkraftanlage Alexanderschanze',
 'Windkraftanlage Alpirsbach',
 'Windkraftanlage Auenberg',
 'Windkraftanlage auf dem Hohwartsattel',
 'Windkraftanlage Bad Mergentheim',
 'Windkraftanlage Binselberg',
 'Windkraftanlage Birkenzell',
 'Windkraftanlage Bopfingen',
 'Windkraftanlage Bronn',
 'Windkraftanlage Dauchingen',
 'Windkraftanlage Dettenberg',
 'Windkraftanlage Dewangen',
 'Windkraftanlage Dornhan',
 'Windkraftanlage Dornstetten',
 'Windkraftanlage Ellenberg',
 'Windkraftanlage Emmingen',
 'Windkraftanlage Fernhöhe',
 'Windkraftanlage Fischerbach',
 'Windkraftanlage Fröhnd',
 'Windkraftanlage Fürstenberg',
 'Windkraftanlage Glatten',
 'Windkraftanlage Grüner Heiner',
 'Windkraftanlage Herbolzheim',
 'Windkraftanlage Honhardt',
 'Windkraftanlage Hornisgrinde',
 'Windkraftanlage Ingersheim',
 'Windkraftanlage Kallenwald',
 'Windkraftanlagen Königseiche',
 'Windkraftanlagen Laichingen',
 'Windkraftanlage Leibertingen',
 'Windkraftanlage Len

In [302]:
df_bw= pd.DataFrame({ key:pd.Series(value) for key, value in table_dict.items() })
df_bw.head(3)

Unnamed: 0,Name,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Koordinaten,Projektierer / Betreiber,Bemerkungen
0,Forschungswindkraftanlage RedoxWind,2017,20,1,Qreon Q82 (1×),Karlsruhe,KA,"49° 1′ 9″ N, 8° 31′ 4″ O",Fraunhofer ICT,Forschungsprojekt mit kombiniertem Batteriespe...
1,Windkraftanlage Alexanderschanze,2003,15,1,Südwind S70 (1×),Bad Peterstal-Griesbach,OG,"48° 28′ 22″ N, 8° 16′ 7″ O","Schmalz, Windenergieanlage „Bei der Schanz“",errichtet an der Schwarzwaldhochstraße im Bere...
2,Windkraftanlage Alpirsbach,1999,23,1,Enercon E-70 E4 (1×),Römlinsdorf,FDS,"48° 20′ 0″ N, 8° 28′ 34″ O","Windkraft Römlinsdorf, Stadtwerke Stuttgart",Repowering 2013 (1× Enercon E-70 E4 statt 1× F...


In [303]:
df_bw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 10 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Name                      200 non-null    object
 1   Baujahr                   200 non-null    object
 2   Gesamt- leistung (MW)     200 non-null    object
 3   Anzahl                    200 non-null    object
 4   Typ (WKA)                 200 non-null    object
 5   Ort                       200 non-null    object
 6   Land- kreis               200 non-null    object
 7   Koordinaten               200 non-null    object
 8   Projektierer / Betreiber  200 non-null    object
 9   Bemerkungen               200 non-null    object
dtypes: object(10)
memory usage: 15.8+ KB


In [304]:
#we have to do it because in wikipedia park for this windpark
#the coordinates are absent
df_bw['Koordinaten'].loc[df_bw['Name'].str.contains('Windpark Hohenlochen')]= str('♁48° 17′ N, 8° 11′ O')



In [305]:
#the make to variable from the 'Koordinaten'
fn = lambda x: pd.Series([i for i in (x.split(','))])
pad = df_bw['Koordinaten'].apply(fn)

pad.rename(columns={0:'Latitude',1:'Longitude'},inplace=True)
df_bw['Latitude']=pad['Latitude']
df_bw['Longitude']=pad['Longitude']

In [306]:
#we drop some rows with not existing anymore windparks
df_bw.drop(df_bw.loc[df_bw['Bemerkungen'].str.contains('wegen Getriebeschaden abgebaut')].index, inplace=True)
df_bw.drop(df_bw.loc[df_bw['Bemerkungen'].str.contains('im Juli 2014 Abschaltung und Demontage aufgrund Repowering-Bonus')].index, inplace=True)
df_bw.drop(df_bw.loc[df_bw['Bemerkungen'].str.contains('Abbau vermutlich 2008/2009')].index, inplace=True)

df_bw.set_index('Name', inplace=True)
df_bw['Land']='BW'


df_bw['Anzahl'] = [int(str(i)) for i in df_bw['Anzahl']]
df_bw['Gesamt- leistung (MW)'] = [float(str(i).replace(",", ".")) for i in df_bw['Gesamt- leistung (MW)']]

df_bw.head(3)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Koordinaten,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude,Land
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Forschungswindkraftanlage RedoxWind,2017,2.0,1,Qreon Q82 (1×),Karlsruhe,KA,"49° 1′ 9″ N, 8° 31′ 4″ O",Fraunhofer ICT,Forschungsprojekt mit kombiniertem Batteriespe...,49° 1′ 9″ N,8° 31′ 4″ O,BW
Windkraftanlage Alexanderschanze,2003,1.5,1,Südwind S70 (1×),Bad Peterstal-Griesbach,OG,"48° 28′ 22″ N, 8° 16′ 7″ O","Schmalz, Windenergieanlage „Bei der Schanz“",errichtet an der Schwarzwaldhochstraße im Bere...,48° 28′ 22″ N,8° 16′ 7″ O,BW
Windkraftanlage Alpirsbach,1999,2.3,1,Enercon E-70 E4 (1×),Römlinsdorf,FDS,"48° 20′ 0″ N, 8° 28′ 34″ O","Windkraft Römlinsdorf, Stadtwerke Stuttgart",Repowering 2013 (1× Enercon E-70 E4 statt 1× F...,48° 20′ 0″ N,8° 28′ 34″ O,BW


In [307]:
df_bw.info()

<class 'pandas.core.frame.DataFrame'>
Index: 197 entries, Forschungswindkraftanlage RedoxWind to Windpark Wiesensteig
Data columns (total 12 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Baujahr                   197 non-null    object 
 1   Gesamt- leistung (MW)     197 non-null    float64
 2   Anzahl                    197 non-null    int64  
 3   Typ (WKA)                 197 non-null    object 
 4   Ort                       197 non-null    object 
 5   Land- kreis               197 non-null    object 
 6   Koordinaten               197 non-null    object 
 7   Projektierer / Betreiber  197 non-null    object 
 8   Bemerkungen               197 non-null    object 
 9   Latitude                  197 non-null    object 
 10  Longitude                 197 non-null    object 
 11  Land                      197 non-null    object 
dtypes: float64(1), int64(1), object(10)
memory usage: 20.0+ KB


In [308]:
orts_bw=df_bw.groupby(['Land- kreis']).agg({'Anzahl':'sum'}).sort_values(by='Anzahl',ascending=False)

orts_bw.head(10)

#we see that some Land-Kreis are mixture of two :GPAA and HNKÜN
#if we use 'land kreis' later, we should something do with this
#to replace such values with one of the Land kreises (research in the internet)
#but if I am going to use only coordinates I can leave it as it is so far

Unnamed: 0_level_0,Anzahl
Land- kreis,Unnamed: 1_level_1
TBB,137
SHA,113
AA,84
UL,46
OG,45
GP,41
MOS,39
HDH,38
GPAA,19
HNKÜN,18


In [309]:
orts_bw_MW=df_bw.groupby(['Land- kreis']).agg({'Gesamt- leistung (MW)':'sum'}).sort_values(by='Gesamt- leistung (MW)',ascending=False)

orts_bw_MW.head(10)


Unnamed: 0_level_0,Gesamt- leistung (MW)
Land- kreis,Unnamed: 1_level_1
SHA,350.42
TBB,284.5
AA,211.04
OG,131.885
HDH,113.43
MOS,104.66
GP,80.6
UL,68.14
HNKÜN,54.9
GPAA,52.25


In [310]:
bw=df_bw[['Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Projektierer / Betreiber', 'Bemerkungen', 'Latitude', 'Longitude']]
bw.head(3)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Forschungswindkraftanlage RedoxWind,2017,2.0,1,Qreon Q82 (1×),Karlsruhe,KA,Fraunhofer ICT,Forschungsprojekt mit kombiniertem Batteriespe...,49° 1′ 9″ N,8° 31′ 4″ O
Windkraftanlage Alexanderschanze,2003,1.5,1,Südwind S70 (1×),Bad Peterstal-Griesbach,OG,"Schmalz, Windenergieanlage „Bei der Schanz“",errichtet an der Schwarzwaldhochstraße im Bere...,48° 28′ 22″ N,8° 16′ 7″ O
Windkraftanlage Alpirsbach,1999,2.3,1,Enercon E-70 E4 (1×),Römlinsdorf,FDS,"Windkraft Römlinsdorf, Stadtwerke Stuttgart",Repowering 2013 (1× Enercon E-70 E4 statt 1× F...,48° 20′ 0″ N,8° 28′ 34″ O


In [343]:
print(bw['Land- kreis'].unique())

dict_bw={'AAHDH':'AA',
        'RWOG':'RW',
        'HNKÜN':'HN',
        'GPAA':'GP',
        'CWFDS':'CW',
        'FRFR':'FR',
        'FREM':'FR',
        'OGRWVS':'OG',
        'TBBMOS':'TBB'}

bw['Land- kreis'].replace(dict_bw, inplace=True)
print(bw['Land- kreis'].unique())



['KA' 'OG' 'FDS' 'VS' 'FR' 'TBB' 'SHA' 'AA' 'BC' 'RW' 'CW' 'LÖ' 'S' 'EM'
 'LB' 'GP' 'UL' 'SIG' 'KÜN' 'HDH' 'HN' 'WN' 'WT' 'MOS' 'RV' 'TUT' 'RT'
 'AAHDH' 'RWOG' 'HNKÜN' 'BL' 'GPAA' 'CWFDS' 'TBBMOS' 'FRFR' 'FREM' 'PF'
 'OGRWVS' 'KN']
['KA' 'OG' 'FDS' 'VS' 'FR' 'TBB' 'SHA' 'AA' 'BC' 'RW' 'CW' 'LÖ' 'S' 'EM'
 'LB' 'GP' 'UL' 'SIG' 'KÜN' 'HDH' 'HN' 'WN' 'WT' 'MOS' 'RV' 'TUT' 'RT'
 'BL' 'PF' 'KN']


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bw['Land- kreis'].replace(dict_bw, inplace=True)


In [344]:
bw.head(10)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Forschungswindkraftanlage RedoxWind,2017,2.0,1,Qreon Q82 (1×),Karlsruhe,KA,Fraunhofer ICT,Forschungsprojekt mit kombiniertem Batteriespe...,49° 1′ 9″ N,8° 31′ 4″ O
Windkraftanlage Alexanderschanze,2003,1.5,1,Südwind S70 (1×),Bad Peterstal-Griesbach,OG,"Schmalz, Windenergieanlage „Bei der Schanz“",errichtet an der Schwarzwaldhochstraße im Bere...,48° 28′ 22″ N,8° 16′ 7″ O
Windkraftanlage Alpirsbach,1999,2.3,1,Enercon E-70 E4 (1×),Römlinsdorf,FDS,"Windkraft Römlinsdorf, Stadtwerke Stuttgart",Repowering 2013 (1× Enercon E-70 E4 statt 1× F...,48° 20′ 0″ N,8° 28′ 34″ O
Windkraftanlage Auenberg,1990,0.095,1,Reymo DANmark 22 (1×),Hausen vor Wald,VS,Pionier-Windmühle Auenberg,(*); erste kommerziell betriebene Windkraftanl...,47° 53′ 19″ N,8° 28′ 24″ O
Windkraftanlage auf dem Hohwartsattel,1992,0.3,1,Enercon E-33 (1×),Breitnau,FR,"Christian Riesterer, Uwe Tischler",(*),47° 56′ 14″ N,8° 3′ 11″ O
Windkraftanlage Bad Mergentheim,2002,0.8,1,Nordex N50/800 (1×),Apfelbach,TBB,Glotter,,49° 26′ 18″ N,9° 48′ 31″ O
Windkraftanlage Binselberg,2001,1.0,1,Fuhrländer FL 1000/54 (1×),Binselberg,SHA,Windkraft Binselberg,,49° 16′ 5″ N,9° 55′ 35″ O
Windkraftanlage Birkenzell,2014,0.8,1,Enercon E-53 (1×),Birkenzell,AA,Alois Erhardt,,48° 59′ 27″ N,10° 15′ 17″ O
Windkraftanlage Bopfingen,2020,0.08,1,Enercon E-18 (1×),Bopfingen,AA,,errichtet auf 10 Meter Nabenhöhe,48° 50′ 34″ N,10° 19′ 50″ O
Windkraftanlage Bronn,2015,0.8,1,Enercon E-53 (1×),Bronn,TBB,NOW Wasserwerk Bronn,Eigenversorgung von Wasserwerk,49° 26′ 13″ N,9° 54′ 8″ O


### Bayern:

In [311]:
#Bayerische windaprks
url_bayern="https://de.wikipedia.org/wiki/Liste_von_Windkraftanlagen_in_Bayern"
# use requests.get() method with the provided static_url
response_bayern=requests.get(url_bayern).text
# assign the response to a object
soup_bayern=BeautifulSoup(response_bayern, 'html.parser')
# Use soup.title attribute
print('The title is:')
for title in soup_bayern.find_all('title'):
    print(title.get_text())

The title is:
Liste von Windkraftanlagen in Bayern – Wikipedia


In [312]:
html_tables=soup_bayern.find_all('table')
table_bayern = html_tables[0]


In [313]:
column_names = []

# Apply find_all() function with `th` element on first_launch_table
# Iterate each th element and apply the provided extract_column_from_header() to get a column name
# Append the Non-empty column name (`if name is not None and len(name) > 0`) into a list called column_names

columns=table_bayern.find_all('th')
for column in columns:
    name=extract_column_from_header(column)
    
    if name!= None:
        if len(name)>0:
            column_names.append(name)
    

In [314]:
print(column_names)

['Name', 'Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Koordinaten', 'Projektierer / Betreiber', 'Bemerkungen']


In [315]:
rows_ba=[]    
for row in table_bayern.find_all('tr'):
        td = row.find_all('td')
        row = [row.text for row in td]
        rows_ba.append(row)



In [316]:
len(rows_ba)

353

In [317]:
table_dict= dict.fromkeys(column_names)



# Let's initial the table_dict with each value to be an empty list
table_dict['Name'] = []
table_dict['Baujahr'] = []
table_dict['Gesamt- leistung (MW)'] = []
table_dict['Anzahl'] = []
table_dict['Typ (WKA)'] = []
table_dict['Ort'] = []
table_dict['Land- kreis'] = []

table_dict['Koordinaten']=[]
table_dict['Projektierer / Betreiber']=[]
table_dict['Bemerkungen']=[]


In [318]:
#feeling the dictionary
#I decided to leave only the first date
#filling the dictionary
for x in rows_ba[1:len(rows_ba)]:
    
    years=x[1].strip()
    table_dict['Baujahr'].append(years)

table_dict['Baujahr'] = [x[0:4] for x in table_dict['Baujahr']]

In [319]:
#filling the dictionaries:

for i in rows_ba[1:len(rows_ba)]:
    if i[0]!=None:
        name=i[0].strip()
        name=re.sub(r'\[\d+\]', '', name)
        table_dict['Name'].append(name)
        
    
    
    table_dict['Gesamt- leistung (MW)'].append(i[2].strip())
    table_dict['Anzahl'].append(i[3].strip())
    table_dict['Typ (WKA)'].append(i[4].strip())
    table_dict['Ort'].append(i[5].strip())
    table_dict['Land- kreis'].append(i[6].strip())

    table_dict['Koordinaten'].append(i[7].strip())
    table_dict['Projektierer / Betreiber'].append(i[8].strip())
    table_dict['Bemerkungen'].append(i[-1].strip())                                             
  
    

In [320]:
df_bayern= pd.DataFrame({ key:pd.Series(value) for key, value in table_dict.items() })

df_bayern.drop(df_bayern.loc[df_bayern['Name']=='Windkraftanlage Nößling'].index, inplace=True)
df_bayern.drop(df_bayern.loc[df_bayern['Name']=='Windkraftanlage Schmidsreute'].index, inplace=True)
df_bayern.drop(df_bayern.loc[df_bayern['Name']=='Windkraftanlage Schnebes'].index, inplace=True)
df_bayern.drop(df_bayern.loc[df_bayern['Name']=='Windkraftanlage Sengenbühl'].index, inplace=True)
               
    
df_bayern.info()


#df.drop(df.loc[df['Brand Name'] == 'OTTERBOX'].index, inplace=True)

<class 'pandas.core.frame.DataFrame'>
Index: 348 entries, 0 to 351
Data columns (total 10 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Name                      348 non-null    object
 1   Baujahr                   348 non-null    object
 2   Gesamt- leistung (MW)     348 non-null    object
 3   Anzahl                    348 non-null    object
 4   Typ (WKA)                 348 non-null    object
 5   Ort                       348 non-null    object
 6   Land- kreis               348 non-null    object
 7   Koordinaten               348 non-null    object
 8   Projektierer / Betreiber  348 non-null    object
 9   Bemerkungen               348 non-null    object
dtypes: object(10)
memory usage: 29.9+ KB


In [321]:
fn = lambda x: pd.Series([i for i in (x.split(','))])
pad = df_bayern['Koordinaten'].apply(fn)

pad.rename(columns={0:'Latitude',1:'Longitude'},inplace=True)
df_bayern['Latitude']=pad['Latitude']
df_bayern['Longitude']=pad['Longitude']

#now we have to see again that we latitide and longitude are correct
df_bayern.info()

<class 'pandas.core.frame.DataFrame'>
Index: 348 entries, 0 to 351
Data columns (total 12 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Name                      348 non-null    object
 1   Baujahr                   348 non-null    object
 2   Gesamt- leistung (MW)     348 non-null    object
 3   Anzahl                    348 non-null    object
 4   Typ (WKA)                 348 non-null    object
 5   Ort                       348 non-null    object
 6   Land- kreis               348 non-null    object
 7   Koordinaten               348 non-null    object
 8   Projektierer / Betreiber  348 non-null    object
 9   Bemerkungen               348 non-null    object
 10  Latitude                  348 non-null    object
 11  Longitude                 348 non-null    object
dtypes: object(12)
memory usage: 35.3+ KB


In [247]:
df_bayern.set_index('Name', inplace=True)
df_bayern['Land']='Bayern'

df_bayern['Anzahl']=df_bayern['Anzahl'].fillna(0)
df_bayern['Anzahl'] = [int(str(i)) for i in df_bayern['Anzahl']]
df_bayern['Gesamt- leistung (MW)'] = [float(str(i).replace(",", ".")) for i in df_bayern['Gesamt- leistung (MW)']]


df_bayern.head(3)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Koordinaten,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude,Land
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Windkraftanlage Breitungen/Werra,2010,0.6,1,Tacke TW 600e (1×),Breitungen-Winne,SM,"50° 45′ 22″ N, 10° 21′ 19″ O",,Sanierung 2010,50° 45′ 22″ N,10° 21′ 19″ O,Bayern
Windkraftanlage Cretzschwitz,2010,2.35,1,Enercon E-92 (1×),Cretzschwitz,G,"50° 56′ 12″ N, 12° 6′ 24″ O",GoEn Planungsgesellschaft,,50° 56′ 12″ N,12° 6′ 24″ O,Bayern
Windkraftanlage Diedorf,2010,0.6,1,Micon M 1500-600 (1×),Diedorf,UH,"51° 11′ 22″ N, 10° 17′ 32″ O",,,51° 11′ 22″ N,10° 17′ 32″ O,Bayern


In [248]:


orts_bayern=df_bayern.groupby(['Land- kreis']).agg({'Anzahl':'sum'}).sort_values(by='Anzahl',ascending=False)
orts_bayern

Unnamed: 0_level_0,Anzahl
Land- kreis,Unnamed: 1_level_1
HO,109
SÖM,103
UH,85
WAK,83
KYF,83
...,...
FO,1
WM,1
NU,1
FRG,1


In [249]:
orts_bayern_MW=df_bayern.groupby(['Land- kreis']).agg({'Gesamt- leistung (MW)':'sum'}).sort_values(by='Gesamt- leistung (MW)',ascending=False)
orts_bayern_MW




Unnamed: 0_level_0,Gesamt- leistung (MW)
Land- kreis,Unnamed: 1_level_1
SÖM,270.950
HO,242.700
UH,208.850
KYF,180.650
AN,171.680
...,...
WM,0.600
FRG,0.200
SÖ,0.075
REG,0.050


In [324]:
bayern=df_bayern[['Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Projektierer / Betreiber', 'Bemerkungen', 'Latitude', 'Longitude']]
bayern.head(3)

Unnamed: 0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude
0,2009,20,1,Vestas V90-2.0MW (1×),Waller,LAU,"ATM Hartmann, SRE Sachs",,49° 25′ 26″ N,11° 29′ 21″ O
1,2002,6,1,Enercon E-40/6.44 (1×),Göggelsbuch,RH,,,49° 12′ 56″ N,11° 12′ 14″ O
2,2013,18,1,Vestas V100-1.8MW (1×),Altmannstein,EI,Windkraft Kernl,,48° 56′ 25″ N,11° 34′ 54″ O


In [382]:
print(bayern['Land- kreis'].unique())

dict_ba={'LAUNM':'LAU',
        'ANNEA':'AN',
        'LIFKU':'LIF',
        'BTKU':'BT',
        'EIRH':'EI',
        'ANWUG':'AN',
        'EIWUG':'EI',
        'ASNEW':'AS',
        'MSPSW':'MSP',
        'KCKU':'KC',
        'MSPWÜ':'MSP',
        'AN\nAN':'AN',
        'BALIF':'BA',
        'OALOA':'OAL',
        'NEAERH':'NEA',
        'GZA':'GZ',
        'EG':'PAN',
        'HOBT':'HO'}

bayern['Land- kreis'].replace(dict_ba, inplace=True)
print(bayern['Land- kreis'].unique())



['LAU' 'RH' 'EI' 'ND' 'AN' 'OAL' 'NM' 'R' 'WÜ' 'MN' 'REG' 'KU' 'KT' 'MSP'
 'DAH' 'BT' 'M' 'FS' 'PAN' 'AS' 'PAF' 'DLG' 'FÜ' 'OA' 'SW' 'LA' 'FRG'
 'CHA' 'FO' 'SR' 'KC' 'NEA' 'FFB' 'SAD' 'WUG' 'ERH' 'TIR' 'EBE' 'WM' 'KEH'
 'RO' 'BA' 'TS' 'NU' 'HO' 'PA' 'WUN' 'DON' 'AIC' 'HAS' 'LL' 'DGF' 'A'
 'MIL' 'NEW' 'HOBT' 'KG' 'NES' 'LIF' 'CO' 'MB' 'GZ' 'STA']
['LAU' 'RH' 'EI' 'ND' 'AN' 'OAL' 'NM' 'R' 'WÜ' 'MN' 'REG' 'KU' 'KT' 'MSP'
 'DAH' 'BT' 'M' 'FS' 'PAN' 'AS' 'PAF' 'DLG' 'FÜ' 'OA' 'SW' 'LA' 'FRG'
 'CHA' 'FO' 'SR' 'KC' 'NEA' 'FFB' 'SAD' 'WUG' 'ERH' 'TIR' 'EBE' 'WM' 'KEH'
 'RO' 'BA' 'TS' 'NU' 'HO' 'PA' 'WUN' 'DON' 'AIC' 'HAS' 'LL' 'DGF' 'A'
 'MIL' 'NEW' 'KG' 'NES' 'LIF' 'CO' 'MB' 'GZ' 'STA']


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  bayern['Land- kreis'].replace(dict_ba, inplace=True)


### Berlin and Brandenburg:

In [136]:
url_be_ba="https://de.wikipedia.org/wiki/Liste_von_Windkraftanlagen_in_Berlin_und_Brandenburg"
# use requests.get() method with the provided static_url
response_be_ba=requests.get(url_be_ba).text
# assign the response to a object
soup_be_ba=BeautifulSoup(response_be_ba, 'html.parser')
# Use soup.title attribute
print('The title is:')
for title in soup_be_ba.find_all('title'):
    print(title.get_text())
    
html_tables=soup_be_ba.find_all('table')
table_be_ba = html_tables[0]

column_names = []

# Apply find_all() function with `th` element on first_launch_table
# Iterate each th element and apply the provided extract_column_from_header() to get a column name
# Append the Non-empty column name (`if name is not None and len(name) > 0`) into a list called column_names

columns=table_be_ba.find_all('th')
for column in columns:
    name=extract_column_from_header(column)
    
    if name!= None:
        if len(name)>0:
            column_names.append(name)
    
print(column_names)

The title is:
Liste von Windkraftanlagen in Berlin und Brandenburg – Wikipedia
['Name', 'Baujahr', 'Gesamt-  Leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land-  kreis', 'Koordinaten', 'Projektierer / Betreiber', 'Bemerkungen']


In [137]:
rows_be_ba=[]    
for row in table_be_ba.find_all('tr'):
        td = row.find_all('td')
        row = [row.text for row in td]
        rows_be_ba.append(row)
        
        
print(len(rows_be_ba))


304


In [138]:
table_dict= dict.fromkeys(column_names)



# Let's initial the table_dict with each value to be an empty list
table_dict['Name'] = []
table_dict['Baujahr'] = []
table_dict['Gesamt- leistung (MW)'] = []
table_dict['Anzahl'] = []
table_dict['Typ (WKA)'] = []
table_dict['Ort'] = []
table_dict['Land- kreis'] = []

table_dict['Koordinaten']=[]
table_dict['Projektierer / Betreiber']=[]
table_dict['Bemerkungen']=[]


In [139]:


#filling the dictionaries:



for i in rows_be_ba[1:len(rows_be_ba)]:
    if i[0]!=None:
        name=i[0].strip()
        name=re.sub(r'\[\d+\]', '', name)
        table_dict['Name'].append(name)
        
    years=x[1].strip()
    table_dict['Baujahr'].append(years)
    table_dict['Baujahr'] = [x[0:4] for x in table_dict['Baujahr']]
    
    table_dict['Gesamt- leistung (MW)'].append(i[2].strip())
    table_dict['Anzahl'].append(i[3].strip())
    table_dict['Typ (WKA)'].append(i[4].strip())
    table_dict['Ort'].append(i[5].strip())
    table_dict['Land- kreis'].append(i[6].strip())

    table_dict['Koordinaten'].append(i[7].strip())
    table_dict['Projektierer / Betreiber'].append(i[8].strip())
    table_dict['Bemerkungen'].append(i[-1].strip())                                             
  
    

In [144]:
df_berlin_br= pd.DataFrame({ key:pd.Series(value) for key, value in table_dict.items() })

#df_berlin_br.drop(df_berlin_br.loc[df_berlin_br['Bemerkungen'].str.contains('abgebaut')].index, inplace=True)
df_berlin_br.drop(df_berlin_br.loc[df_berlin_br['Name']=='Windkraftanlage Althüttendorf'].index, inplace=True)
df_berlin_br.drop(df_berlin_br.loc[df_berlin_br['Name']=='Windkraftanlage Groß Leppin'].index, inplace=True)
df_berlin_br.drop(df_berlin_br.loc[df_berlin_br['Name']=='Windkraftanlage Nettelbeck'].index, inplace=True)
df_berlin_br.drop(df_berlin_br.loc[df_berlin_br['Name']=='Windkraftanlagen Beveringen-Nord'].index, inplace=True)
df_berlin_br.drop(df_berlin_br.loc[df_berlin_br['Name']=='Windkraftanlagen Drewen'].index, inplace=True)
df_berlin_br.drop(df_berlin_br.loc[df_berlin_br['Name']=='Windkraftanlage Hohenseefeld'].index, inplace=True)
df_berlin_br.drop(df_berlin_br.loc[df_berlin_br['Name']=='Windkraftanlagen Horstfelde'].index, inplace=True)
df_berlin_br.drop(df_berlin_br.loc[df_berlin_br['Name']=='Windkraftanlagen Niemerlang'].index, inplace=True)
df_berlin_br.drop(df_berlin_br.loc[df_berlin_br['Name']=='Windpark Hasenwinkel'].index, inplace=True)
df_berlin_br.drop(df_berlin_br.loc[df_berlin_br['Name']=='Windkraftanlagen Rüdersdorf'].index, inplace=True)
df_berlin_br.drop(df_berlin_br.loc[df_berlin_br['Name']=='Windkraftanlagen Stöffin'].index, inplace=True)

df_berlin_br.set_index('Name', inplace=True)
df_berlin_br['Land']='Br-Ber'


#splitting the coordinated variable to two
fn = lambda x: pd.Series([i for i in (x.split(','))])
coord = df_berlin_br['Koordinaten'].apply(fn)
coord.rename(columns={0:'Latitude',1:'Longitude'},inplace=True)
df_berlin_br['Latitude']=coord['Latitude']
df_berlin_br['Longitude']=coord['Longitude']






#df_berlin_br['Anzahl']=df_berlin_br['Anzahl'].fillna(0)
df_berlin_br['Anzahl'] = [int(str(i)) for i in df_berlin_br['Anzahl']]
df_berlin_br['Gesamt- leistung (MW)'] = [float(str(i).replace(",", ".")) for i in df_berlin_br['Gesamt- leistung (MW)']]

df_berlin_br.head(3)
#scraping had some strange results: Gesamt_Leistung(MW) and Landkreis are duplicated (and one of the columns 
#contains NaNs
#we remove them)

Unnamed: 0_level_0,Baujahr,Gesamt- Leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Koordinaten,Projektierer / Betreiber,Bemerkungen,Gesamt- leistung (MW),Land- kreis,Land,Latitude,Longitude
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
I. Windkraft­anlage Pankow,2010,,1,Enercon E-82 (1×),Buch,,"52° 38′ 10″ N, 13° 25′ 47″ O",NEB,bis Ende 2014 einzige Windkraftanlage in Berli...,2.0,B,Br-Ber,52° 38′ 10″ N,13° 25′ 47″ O
II. und VI. Wind­kraft­anlage Pankow,2010,,2,Enercon E-82 E2 (1×) Enercon E-138 EP3 E2 (1×),Stadtrand­siedlung Malchow,,"52° 35′ 29″ N, 13° 30′ 4″ O",NEB,,6.5,B,Br-Ber,52° 35′ 29″ N,13° 30′ 4″ O
III. und IV. Wind­kraft­anlage Pankow,2010,,2,Enercon E-92 (2×),Französisch Buchholz,,"52° 37′ 28″ N, 13° 26′ 23″ O",Phase I GmbH & Co Windkraft Pankow,auf dem Gelände von Heron Fireworks,4.7,B,Br-Ber,52° 37′ 28″ N,13° 26′ 23″ O


In [145]:
df_berlin_br= df_berlin_br.drop(df_berlin_br.columns[1], axis=1)
df_berlin_br= df_berlin_br.drop(df_berlin_br.columns[4], axis=1)

df_berlin_br.info()

<class 'pandas.core.frame.DataFrame'>
Index: 292 entries, I. Windkraft­anlage Pankow to Windpark Zossen
Data columns (total 12 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Baujahr                   292 non-null    object 
 1   Anzahl                    292 non-null    int64  
 2   Typ (WKA)                 292 non-null    object 
 3   Ort                       292 non-null    object 
 4   Koordinaten               292 non-null    object 
 5   Projektierer / Betreiber  292 non-null    object 
 6   Bemerkungen               292 non-null    object 
 7   Gesamt- leistung (MW)     292 non-null    float64
 8   Land- kreis               292 non-null    object 
 9   Land                      292 non-null    object 
 10  Latitude                  292 non-null    object 
 11  Longitude                 292 non-null    object 
dtypes: float64(1), int64(1), object(10)
memory usage: 29.7+ KB


In [147]:


orts_berlin_br=df_berlin_br.groupby(['Land- kreis']).agg({'Anzahl':'sum'}).sort_values(by='Anzahl',ascending=False)
orts_berlin_br.head(5)

Unnamed: 0_level_0,Anzahl
Land- kreis,Unnamed: 1_level_1
UM,667
PR,577
OPR,315
EE,303
MOL,294


In [148]:
orts_berlin_br_MW=df_berlin_br.groupby(['Land- kreis']).agg({'Gesamt- leistung (MW)':'sum'}).sort_values(by='Gesamt- leistung (MW)',ascending=False)

orts_berlin_br_MW

Unnamed: 0_level_0,Gesamt- leistung (MW)
Land- kreis,Unnamed: 1_level_1
UM,1666.85
PR,1240.7
MOL,707.08
LDS,616.4
EE,613.5
TF,530.35
OPR,497.17
LOS,466.17
HVL,361.5
BAR,352.35


In [251]:
br=df_berlin_br[['Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Projektierer / Betreiber', 'Bemerkungen', 'Latitude', 'Longitude']]
br.head(3)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
I. Windkraft­anlage Pankow,2010,2.0,1,Enercon E-82 (1×),Buch,B,NEB,bis Ende 2014 einzige Windkraftanlage in Berli...,52° 38′ 10″ N,13° 25′ 47″ O
II. und VI. Wind­kraft­anlage Pankow,2010,6.5,2,Enercon E-82 E2 (1×) Enercon E-138 EP3 E2 (1×),Stadtrand­siedlung Malchow,B,NEB,,52° 35′ 29″ N,13° 30′ 4″ O
III. und IV. Wind­kraft­anlage Pankow,2010,4.7,2,Enercon E-92 (2×),Französisch Buchholz,B,Phase I GmbH & Co Windkraft Pankow,auf dem Gelände von Heron Fireworks,52° 37′ 28″ N,13° 26′ 23″ O


In [348]:
print(br['Land- kreis'].unique())

dict_br={'TF PM':'LAU',
        'PM TF':'AN',
        'TF LDS':'LIF',
        'BAR MOL':'BT',
        'OSL EE':'EI',
        'OPR PR':'AN',
        'BAR UM':'EI',
        'SPN OSL':'AS',
        'LDS LOS':'MSP',
        'OSL SPN':'KC'}

br['Land- kreis'].replace(dict_br, inplace=True)
print(br['Land- kreis'].unique())

['B' 'OPR' 'HVL' 'SPN' 'FF' 'MOL' 'OSL' 'PR' 'TF' 'UM' 'LOS' 'PM' 'LDS'
 'BAR' 'EE' 'BRB' 'TF PM' 'OHV' 'CB' 'TF LDS' 'PM TF' 'BAR MOL' 'OSL EE'
 'OPR PR' 'BAR UM' 'SPN OSL' 'LDS LOS' 'OSL SPN']
['B' 'OPR' 'HVL' 'SPN' 'FF' 'MOL' 'OSL' 'PR' 'TF' 'UM' 'LOS' 'PM' 'LDS'
 'BAR' 'EE' 'BRB' 'LAU' 'OHV' 'CB' 'LIF' 'AN' 'BT' 'EI' 'AS' 'MSP' 'KC']


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  br['Land- kreis'].replace(dict_br, inplace=True)


### Bremen, Hamburg, Niedersachsen:


In [149]:
url_nord="https://de.wikipedia.org/wiki/Liste_von_Windkraftanlagen_in_Bremen,_Hamburg_und_Niedersachsen"
# use requests.get() method with the provided static_url
response_nord=requests.get(url_nord).text
# assign the response to a object
soup_nord=BeautifulSoup(response_nord, 'html.parser')
# Use soup.title attribute
print('The title is:')
for title in soup_nord.find_all('title'):
    print(title.get_text())
    
html_tables=soup_nord.find_all('table')
table_nord = html_tables[0]

column_names = []

# Apply find_all() function with `th` element on first_launch_table
# Iterate each th element and apply the provided extract_column_from_header() to get a column name
# Append the Non-empty column name (`if name is not None and len(name) > 0`) into a list called column_names

columns=table_nord.find_all('th')
for column in columns:
    name=extract_column_from_header(column)
    
    if name!= None:
        if len(name)>0:
            column_names.append(name)
    
print(column_names)

The title is:
Liste von Windkraftanlagen in Bremen, Hamburg und Niedersachsen – Wikipedia
['Name', 'Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Koordinaten', 'Projektierer / Betreiber', 'Bemerkungen']


In [150]:
rows_nord=[]    
for row in table_nord.find_all('tr'):
        td = row.find_all('td')
        row = [row.text for row in td]
        rows_nord.append(row)
        
        
print(len(rows_nord))


1001


In [151]:
table_dict= dict.fromkeys(column_names)



# Let's initial the table_dict with each value to be an empty list
table_dict['Name'] = []
table_dict['Baujahr'] = []
table_dict['Gesamt- leistung (MW)'] = []
table_dict['Anzahl'] = []
table_dict['Typ (WKA)'] = []
table_dict['Ort'] = []
table_dict['Land- kreis'] = []

table_dict['Koordinaten']=[]
table_dict['Projektierer / Betreiber']=[]
table_dict['Bemerkungen']=[]


#filling the dictionaries:


for i in rows_nord[1:len(rows_nord)]:
    if i[0]!=None:
        name=i[0].strip()
        name=re.sub(r'\[\d+\]', '', name)
        table_dict['Name'].append(name)
        
    years=x[1].strip()
    
    table_dict['Baujahr'].append(years)
    table_dict['Baujahr'] = [x[0:4] for x in table_dict['Baujahr']]
    
    table_dict['Gesamt- leistung (MW)'].append(i[2].strip())
    table_dict['Anzahl'].append(i[3].strip())
    table_dict['Typ (WKA)'].append(i[4].strip())
    table_dict['Ort'].append(i[5].strip())
    table_dict['Land- kreis'].append(i[6].strip())

    table_dict['Koordinaten'].append(i[7].strip())
    table_dict['Projektierer / Betreiber'].append(i[8].strip())
    table_dict['Bemerkungen'].append(i[-1].strip())                                             
  
    

In [152]:
df_nord= pd.DataFrame({ key:pd.Series(value) for key, value in table_dict.items() })

#deleting not existing anymore windparks:
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Abbehausergroden'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Ahndeich'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Apelnstedt'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Auestade'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlagen Aurich-Extum'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Aurich-Haxtum'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlagen Aurich-Tannenhausen'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Aurich-Walle'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Bad Harzburg'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Bienerfeld'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Braunschweig-Veltenhof'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Bulsterdeich'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlagen Clausthal-Zellerfeld'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Flessenmoor'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Geeste'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Gräpel'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Gut Wormsthal'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Hachum'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Heisterberg'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Hemmoor'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Hilter'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Hooksiel'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Hütten'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Langenhagen-Godshorn'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Mainzholzen'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Meinersen'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Neuschoo'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Nienbrügge'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Nikolausdorf'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Norden'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlagen Nordwohlde'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Obersburg'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Ochsenwerder'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Pollhagen'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Sandkrug-Klärwerk'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Schümersdamm'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Seevernserwisch'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Springe'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Stührenberg'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windkraftanlage Tarbarg'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windpark Fledderweg'].index, inplace=True)
df_nord.drop(df_nord.loc[df_nord['Name']=='Windpark Neu Wulmstorf'].index, inplace=True)


df_nord.set_index('Name', inplace=True)
df_nord['Land']='HH,HB,H'



fn = lambda x: pd.Series([i for i in (x.split(','))])
coord = df_nord['Koordinaten'].apply(fn)
coord.rename(columns={0:'Latitude',1:'Longitude'},inplace=True)
df_nord['Latitude']=coord['Latitude']
df_nord['Longitude']=coord['Longitude']


df_nord['Anzahl'] = [int(str(i)) for i in df_nord['Anzahl']]
df_nord['Gesamt- leistung (MW)'] = [float(str(i).replace(",", ".")) for i in df_nord['Gesamt- leistung (MW)']]



df_nord.head(3)


Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Koordinaten,Projektierer / Betreiber,Bemerkungen,Land,Latitude,Longitude
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Windkraftanlagen Afferde,2010,6.6,2,Vestas V126-3.3MW (1×)Nordex N131/3300 (1×),Afferde,HM,"52° 6′ 11″ N, 9° 25′ 28″ O",Landwind,,"HH,HB,H",52° 6′ 11″ N,9° 25′ 28″ O
Windkraftanlagen Ahlshausen,2010,1.4,2,Tacke TW 600 (1×)Enercon E-53 (1×),Ahlshausen,NOM,"51° 47′ 23″ N, 9° 59′ 6″ O",,2011 Repowering (1× Enercon E-53 statt 1× Ener...,"HH,HB,H",51° 47′ 23″ N,9° 59′ 6″ O
Windkraftanlage Aldorf,2010,3.3,1,Vestas V112-3.3MW (1×),Aldorf,DH,"52° 15′ 6″ N, 8° 29′ 31″ O",,,"HH,HB,H",52° 15′ 6″ N,8° 29′ 31″ O


In [153]:
df_nord.info()

<class 'pandas.core.frame.DataFrame'>
Index: 958 entries, Windkraftanlagen Afferde to Windpark Bad Zwischenahn
Data columns (total 12 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Baujahr                   958 non-null    object 
 1   Gesamt- leistung (MW)     958 non-null    float64
 2   Anzahl                    958 non-null    int64  
 3   Typ (WKA)                 958 non-null    object 
 4   Ort                       958 non-null    object 
 5   Land- kreis               958 non-null    object 
 6   Koordinaten               958 non-null    object 
 7   Projektierer / Betreiber  958 non-null    object 
 8   Bemerkungen               958 non-null    object 
 9   Land                      958 non-null    object 
 10  Latitude                  958 non-null    object 
 11  Longitude                 958 non-null    object 
dtypes: float64(1), int64(1), object(10)
memory usage: 97.3+ KB


In [154]:

orts_nord=df_nord.groupby(['Land- kreis']).agg({'Anzahl':'sum'}).sort_values(by='Anzahl',ascending=False)
orts_nord

Unnamed: 0_level_0,Anzahl
Land- kreis,Unnamed: 1_level_1
CUX,516
EL,499
AUR,487
DH,407
CLP,233
...,...
DEL,5
WOB,5
BS,5
RI,1


In [155]:

orts_nord_MW=df_nord.groupby(['Land- kreis']).agg({'Gesamt- leistung (MW)':'sum'}).sort_values(by='Gesamt- leistung (MW)',ascending=False)
orts_nord_MW

Unnamed: 0_level_0,Gesamt- leistung (MW)
Land- kreis,Unnamed: 1_level_1
EL,1253.450
DH,905.635
AUR,868.455
CUX,802.075
STD,557.565
...,...
DEL,6.000
HSHG,5.000
WOB,3.400
BH,0.150


In [252]:
hbh=df_nord[['Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Projektierer / Betreiber', 'Bemerkungen', 'Latitude', 'Longitude']]
hbh.head(3)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Windkraftanlagen Afferde,2010,6.6,2,Vestas V126-3.3MW (1×)Nordex N131/3300 (1×),Afferde,HM,Landwind,,52° 6′ 11″ N,9° 25′ 28″ O
Windkraftanlagen Ahlshausen,2010,1.4,2,Tacke TW 600 (1×)Enercon E-53 (1×),Ahlshausen,NOM,,2011 Repowering (1× Enercon E-53 statt 1× Ener...,51° 47′ 23″ N,9° 59′ 6″ O
Windkraftanlage Aldorf,2010,3.3,1,Vestas V112-3.3MW (1×),Aldorf,DH,,,52° 15′ 6″ N,8° 29′ 31″ O


In [402]:
print(hbh['Land- kreis'].unique())

dict_hbh={'ROWSTD':'ROW',
        'HPE':'H',
        'OSVEC':'OS',
        'PESZHI':'PE',
        'WTMFRI':'WTM',
        'HSHG':'H',
        'OLCLP':'OL',
        'LERWST':'LER',
        'HEMF':'HE',
        'WFSZ':'WF',
        'WTMAUR':'WTM',
        'SHGH':'SHG',
         'HHI':'H',
         'VERROW':'VER',
         'BRAOL':'BRA',
         'SZPE':'SZ',
         'OHZROW':'OHZ',
          'GFWOB':'GF',
         'RI':'SHG',
         'HEWF':'HE',
         'BH':'HB',
         'GIF':'GF'}




hbh['Land- kreis'].replace(dict_hbh, inplace=True)
print(hbh['Land- kreis'].unique())

['HM' 'NOM' 'DH' 'OL' 'BRA' 'HH' 'OS' 'EL' 'WTM' 'STD' 'AUR' 'NOH' 'LG'
 'EMD' 'CUX' 'LER' 'HI' 'HB' 'ROW' 'WF' 'NI' 'SHG' 'CLP' 'FRI' 'DEL' 'WL'
 'H' 'HK' 'WHV' 'GÖ' 'HE' 'PE' 'UE' 'OHZ' 'CE' 'VER' 'DAN' 'VEC' 'HOL'
 'SZ' 'GF' 'GS' 'GIF' 'WST' 'BS' 'WOB']
['HM' 'NOM' 'DH' 'OL' 'BRA' 'HH' 'OS' 'EL' 'WTM' 'STD' 'AUR' 'NOH' 'LG'
 'EMD' 'CUX' 'LER' 'HI' 'HB' 'ROW' 'WF' 'NI' 'SHG' 'CLP' 'FRI' 'DEL' 'WL'
 'H' 'HK' 'WHV' 'GÖ' 'HE' 'PE' 'UE' 'OHZ' 'CE' 'VER' 'DAN' 'VEC' 'HOL'
 'SZ' 'GF' 'GS' 'WST' 'BS' 'WOB']


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hbh['Land- kreis'].replace(dict_hbh, inplace=True)


### Hessen:

In [156]:
url_he="https://de.wikipedia.org/wiki/Liste_von_Windkraftanlagen_in_Hessen"
# use requests.get() method with the provided static_url
response_he=requests.get(url_he).text
# assign the response to a object
soup_he=BeautifulSoup(response_he, 'html.parser')
# Use soup.title attribute
print('The title is:')
for title in soup_he.find_all('title'):
    print(title.get_text())
    
html_tables=soup_he.find_all('table')
table_he = html_tables[0]

column_names = []

# Apply find_all() function with `th` element on first_launch_table
# Iterate each th element and apply the provided extract_column_from_header() to get a column name
# Append the Non-empty column name (`if name is not None and len(name) > 0`) into a list called column_names

columns=table_he.find_all('th')
for column in columns:
    name=extract_column_from_header(column)
    
    if name!= None:
        if len(name)>0:
            column_names.append(name)
    
print(column_names)

The title is:
Liste von Windkraftanlagen in Hessen – Wikipedia
['Name', 'Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Koordinaten', 'Projektierer / Betreiber', 'Bemerkungen']


In [157]:
rows_he=[]    
for row in table_he.find_all('tr'):
        td = row.find_all('td')
        row = [row.text for row in td]
        rows_he.append(row)
        
        
print(len(rows_he))


222


In [158]:
table_dict= dict.fromkeys(column_names)



# Let's initial the table_dict with each value to be an empty list
table_dict['Name'] = []
table_dict['Baujahr'] = []
table_dict['Gesamt- leistung (MW)'] = []
table_dict['Anzahl'] = []
table_dict['Typ (WKA)'] = []
table_dict['Ort'] = []
table_dict['Land- kreis'] = []

table_dict['Koordinaten']=[]
table_dict['Projektierer / Betreiber']=[]
table_dict['Bemerkungen']=[]


#filling the dictionaries:

for i in rows_he[1:len(rows_he)]:
    years=x[1].strip()
    
    table_dict['Baujahr'].append(years)
    table_dict['Baujahr'] = [x[0:4] for x in table_dict['Baujahr']]

for i in rows_he[1:len(rows_he)]:
    if i[0]!=None:
        name=i[0].strip()
        name=re.sub(r'\[\d+\]', '', name)
        table_dict['Name'].append(name)
        
  
    
    table_dict['Gesamt- leistung (MW)'].append(i[2].strip())
    table_dict['Anzahl'].append(i[3].strip())
    table_dict['Typ (WKA)'].append(i[4].strip())
    table_dict['Ort'].append(i[5].strip())
    table_dict['Land- kreis'].append(i[6].strip())

    table_dict['Koordinaten'].append(i[7].strip())
    table_dict['Projektierer / Betreiber'].append(i[8].strip())
    table_dict['Bemerkungen'].append(i[-1].strip())                                             
  
    

In [159]:
df_he= pd.DataFrame({ key:pd.Series(value) for key, value in table_dict.items() })

#split the 'Koordinaten':
fn = lambda x: pd.Series([i for i in (x.split(','))])
coord = df_he['Koordinaten'].apply(fn)
coord.rename(columns={0:'Latitude',1:'Longitude'},inplace=True)
df_he['Latitude']=coord['Latitude']
df_he['Longitude']=coord['Longitude']

#remove not existing wiondparks
df_he.drop(df_he.loc[df_he['Name']=='Windkraftanlage Aarbergen'].index, inplace=True)
df_he.drop(df_he.loc[df_he['Name']=='Windkraftanlagen Zorn'].index, inplace=True)
df_he.drop(df_he.loc[df_he['Name']=='Windpark Alte Schanze'].index, inplace=True)
df_he.drop(df_he.loc[df_he['Name']=='Windpark Hohenzell'].index, inplace=True)
df_he.drop(df_he.loc[df_he['Name']=='Windpark Windhausen-Nord'].index, inplace=True)


df_he.set_index('Name', inplace=True)
df_he['Land']='Hessen'


df_he['Anzahl'] = [int(str(i).replace("[58]", "")) for i in df_he['Anzahl']]
df_he['Gesamt- leistung (MW)'] = [float(str(i).replace(",", ".")) for i in df_he['Gesamt- leistung (MW)']]



df_he.head(3)


Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Koordinaten,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude,Land
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Windkraftanlage Breidenbach (Windpark Hesselbach),2010,3.3,1,Vestas V126-3.3MW (1×),Breidenbach,MR,"50° 53′ 38″ N, 8° 23′ 53″ O",Boxbach Energy,zugehörig zum WP Hesselbach auf nordrhein-west...,50° 53′ 38″ N,8° 23′ 53″ O,Hessen
Windkraftanlage Friedewald,2010,0.6,1,Enercon E-40/6.44 (1×),Friedewald,HEF,"50° 53′ 0″ N, 9° 50′ 10″ O",Windenergie Friedewald,Errichtet an der A 4,50° 53′ 0″ N,9° 50′ 10″ O,Hessen
Windkraftanlage Heitzelberg,2010,5.5,1,GE Wind Energy 5.5-158 (1×),Freienhagen,KB,"50° 15′ 32″ N, 9° 4′ 2″ O",PNE,,50° 15′ 32″ N,9° 4′ 2″ O,Hessen


In [160]:
df_he.info()

<class 'pandas.core.frame.DataFrame'>
Index: 216 entries, Windkraftanlage Breidenbach (Windpark Hesselbach) to Windpark Wirmighausen
Data columns (total 12 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Baujahr                   216 non-null    object 
 1   Gesamt- leistung (MW)     216 non-null    float64
 2   Anzahl                    216 non-null    int64  
 3   Typ (WKA)                 216 non-null    object 
 4   Ort                       216 non-null    object 
 5   Land- kreis               216 non-null    object 
 6   Koordinaten               216 non-null    object 
 7   Projektierer / Betreiber  216 non-null    object 
 8   Bemerkungen               216 non-null    object 
 9   Latitude                  216 non-null    object 
 10  Longitude                 216 non-null    object 
 11  Land                      216 non-null    object 
dtypes: float64(1), int64(1), object(10)
memory usage: 21.9+ KB


In [161]:


orts_he=df_he.groupby(['Land- kreis']).agg({'Anzahl':'sum'}).sort_values(by='Anzahl',ascending=False)
orts_he.head(5)

Unnamed: 0_level_0,Anzahl
Land- kreis,Unnamed: 1_level_1
VB,229
KB,161
KS,139
MKK,109
MR,91


In [162]:
orts_he_MW=df_he.groupby(['Land- kreis']).agg({'Gesamt- leistung (MW)':'sum'}).sort_values(by='Gesamt- leistung (MW)',ascending=False)
orts_he_MW.head(5)

Unnamed: 0_level_0,Gesamt- leistung (MW)
Land- kreis,Unnamed: 1_level_1
VB,457.8
KB,309.12
MKK,287.95
HEF,253.5
KS,224.4


In [253]:
he=df_he[['Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Projektierer / Betreiber', 'Bemerkungen', 'Latitude', 'Longitude']]
he.head(3)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Windkraftanlage Breidenbach (Windpark Hesselbach),2010,3.3,1,Vestas V126-3.3MW (1×),Breidenbach,MR,Boxbach Energy,zugehörig zum WP Hesselbach auf nordrhein-west...,50° 53′ 38″ N,8° 23′ 53″ O
Windkraftanlage Friedewald,2010,0.6,1,Enercon E-40/6.44 (1×),Friedewald,HEF,Windenergie Friedewald,Errichtet an der A 4,50° 53′ 0″ N,9° 50′ 10″ O
Windkraftanlage Heitzelberg,2010,5.5,1,GE Wind Energy 5.5-158 (1×),Freienhagen,KB,PNE,,50° 15′ 32″ N,9° 4′ 2″ O


In [352]:
print(he['Land- kreis'].unique())

dict_he={'LDKLM':'LDK',
        'LDKMR':'LDK',
        'VBMKK':'VB',
        'HEFVB':'HEF',
        'MRGI':'MR'}

he['Land- kreis'].replace(dict_he, inplace=True)
print(he['Land- kreis'].unique())

['MR' 'HEF' 'KB' 'HR' 'FD' 'LM' 'MKK' 'LDK' 'VB' 'KS' 'ESW' 'FB' 'RÜD'
 'DA' 'LDKLM' 'LDKMR' 'ERB' 'VBMKK' 'HG' 'HEFVB' 'HP' 'GI' 'MRGI']
['MR' 'HEF' 'KB' 'HR' 'FD' 'LM' 'MKK' 'LDK' 'VB' 'KS' 'ESW' 'FB' 'RÜD'
 'DA' 'ERB' 'HG' 'HP' 'GI']


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  he['Land- kreis'].replace(dict_he, inplace=True)


### Mecklenburg-Vorpommen:

In [163]:
url_mp="https://de.wikipedia.org/wiki/Liste_von_Windkraftanlagen_in_Mecklenburg-Vorpommern"
# use requests.get() method with the provided static_url
response_mp=requests.get(url_mp).text
# assign the response to a object
soup_mp=BeautifulSoup(response_mp, 'html.parser')
# Use soup.title attribute
print('The title is:')
for title in soup_mp.find_all('title'):
    print(title.get_text())
    
html_tables=soup_mp.find_all('table')
table_mp = html_tables[0]

column_names = []

# Apply find_all() function with `th` element on first_launch_table
# Iterate each th element and apply the provided extract_column_from_header() to get a column name
# Append the Non-empty column name (`if name is not None and len(name) > 0`) into a list called column_names

columns=table_mp.find_all('th')
for column in columns:
    name=extract_column_from_header(column)
    
    if name!= None:
        if len(name)>0:
            column_names.append(name)
    
print(column_names)

The title is:
Liste von Windkraftanlagen in Mecklenburg-Vorpommern – Wikipedia
['Name', 'Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Koordinaten', 'Projektierer / Betreiber', 'Bemerkungen']


In [164]:
rows_mp=[]    
for row in table_mp.find_all('tr'):
        td = row.find_all('td')
        row = [row.text for row in td]
        rows_mp.append(row)
        
        
print(len(rows_mp))


239


In [165]:
table_dict= dict.fromkeys(column_names)



# Let's initial the table_dict with each value to be an empty list
table_dict['Name'] = []
table_dict['Baujahr'] = []
table_dict['Gesamt- leistung (MW)'] = []
table_dict['Anzahl'] = []
table_dict['Typ (WKA)'] = []
table_dict['Ort'] = []
table_dict['Land- kreis'] = []

table_dict['Koordinaten']=[]
table_dict['Projektierer / Betreiber']=[]
table_dict['Bemerkungen']=[]


#filling the dictionaries:



for i in rows_mp[1:len(rows_mp)]:
    if i[0]!=None:
        name=i[0].strip()
        name=re.sub(r'\[\d+\]', '', name)
        table_dict['Name'].append(name)
        
    years=x[1].strip()
    table_dict['Baujahr'].append(years)
    table_dict['Baujahr'] = [x[0:4] for x in table_dict['Baujahr']]
    
    table_dict['Gesamt- leistung (MW)'].append(i[2].strip())
    table_dict['Anzahl'].append(i[3].strip())
    table_dict['Typ (WKA)'].append(i[4].strip())
    table_dict['Ort'].append(i[5].strip())
    table_dict['Land- kreis'].append(i[6].strip())

    table_dict['Koordinaten'].append(i[7].strip())
    table_dict['Projektierer / Betreiber'].append(i[8].strip())
    table_dict['Bemerkungen'].append(i[-1].strip())                                             
  
    

In [166]:
df_mp= pd.DataFrame({ key:pd.Series(value) for key, value in table_dict.items() })


#split the coordinates:
fn = lambda x: pd.Series([i for i in (x.split(','))])
coord = df_mp['Koordinaten'].apply(fn)
coord.rename(columns={0:'Latitude',1:'Longitude'},inplace=True)
df_mp['Latitude']=coord['Latitude']
df_mp['Longitude']=coord['Longitude']

#deleting not existing anymore windparks:
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlage Burow'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlage Graal-Müritz'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlage Hafthagen'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlage Hessenburg'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlage Langsdorf'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlage Mattchow'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlage Michaelsdorf'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlage Michaelsdorf-Süd'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlage Mühlengeez'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlage Neuenlübke'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlage Penzin'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlage Rerik'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlage Schwarbe-Nord'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlage Schwerin'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlage Thomashof'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlage Zarnekow'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlagen Hägerfelde'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlagen Poseritz'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlagen Russow'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlagen Schaprode'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlagen Tempel'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlagen Starrvitz'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windkraftanlagen Teßmannsdorf'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='WWindkraftanlagen Trent'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windpark Bodstedt'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windpark Gagzow'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windpark Holstenhagen'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windpark Klein Welzin'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windpark Pragsdorf'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windpark Sagard'].index, inplace=True)
df_mp.drop(df_mp.loc[df_mp['Name']=='Windpark Schwarbe'].index, inplace=True)





df_mp.set_index('Name', inplace=True)
df_mp['Land']='MVP'

df_mp['Anzahl'] = [int(str(i)) for i in df_mp['Anzahl']]
df_mp['Gesamt- leistung (MW)'] = [float(str(i).replace(",", ".")) for i in df_mp['Gesamt- leistung (MW)']]


df_mp.head(3)


Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Koordinaten,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude,Land
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Windkraftanlage Altenkirchen,2010,0.5,1,Enercon E-40/5.40 (1×),Altenkirchen,VR,"54° 37′ 42″ N, 13° 19′ 55″ O",,,54° 37′ 42″ N,13° 19′ 55″ O,MVP
Windkraftanlage Borg,2010,2.0,1,Enercon E-70 (1×),Ribnitz-Damgarten,VR,"54° 14′ 11″ N, 12° 23′ 15″ O",WIND-Projekt,früher noch 1× Lagerwey LW15/75,54° 14′ 11″ N,12° 23′ 15″ O,MVP
Windkraftanlage Breitling,2010,2.5,1,Nordex N90/2500 (1×),Peez,HRO,"54° 9′ 25″ N, 12° 7′ 33″ O","WIND-projekt, Nordex",Testanlage zur Simulation eines Standortes unt...,54° 9′ 25″ N,12° 7′ 33″ O,MVP


In [167]:
df_mp.info()

<class 'pandas.core.frame.DataFrame'>
Index: 208 entries, Windkraftanlage Altenkirchen to Windpark Züsedom-Fahrenwalde
Data columns (total 12 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Baujahr                   208 non-null    object 
 1   Gesamt- leistung (MW)     208 non-null    float64
 2   Anzahl                    208 non-null    int64  
 3   Typ (WKA)                 208 non-null    object 
 4   Ort                       208 non-null    object 
 5   Land- kreis               208 non-null    object 
 6   Koordinaten               208 non-null    object 
 7   Projektierer / Betreiber  208 non-null    object 
 8   Bemerkungen               208 non-null    object 
 9   Latitude                  208 non-null    object 
 10  Longitude                 208 non-null    object 
 11  Land                      208 non-null    object 
dtypes: float64(1), int64(1), object(10)
memory usage: 21.1+ KB


In [168]:
orts_mp_MW=df_mp.groupby(['Land- kreis']).agg({'Gesamt- leistung (MW)':'sum'}).sort_values(by='Gesamt- leistung (MW)',ascending=False)
orts_mp_MW

Unnamed: 0_level_0,Gesamt- leistung (MW)
Land- kreis,Unnamed: 1_level_1
LUP,830.98
LRO,787.16
VG,707.72
MSE,705.26
VR,456.635
NWM,337.655
VGVR,61.57
HRO,8.4


In [169]:
orts_mp=df_mp.groupby(['Land- kreis']).agg({'Anzahl':'sum'}).sort_values(by='Anzahl',ascending=False)
orts_mp

Unnamed: 0_level_0,Anzahl
Land- kreis,Unnamed: 1_level_1
VG,389
LUP,361
LRO,342
MSE,317
VR,279
NWM,172
VGVR,26
HRO,9


In [254]:
mvp=df_mp[['Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Projektierer / Betreiber', 'Bemerkungen', 'Latitude', 'Longitude']]
mvp.head(3)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Windkraftanlage Altenkirchen,2010,0.5,1,Enercon E-40/5.40 (1×),Altenkirchen,VR,,,54° 37′ 42″ N,13° 19′ 55″ O
Windkraftanlage Borg,2010,2.0,1,Enercon E-70 (1×),Ribnitz-Damgarten,VR,WIND-Projekt,früher noch 1× Lagerwey LW15/75,54° 14′ 11″ N,12° 23′ 15″ O
Windkraftanlage Breitling,2010,2.5,1,Nordex N90/2500 (1×),Peez,HRO,"WIND-projekt, Nordex",Testanlage zur Simulation eines Standortes unt...,54° 9′ 25″ N,12° 7′ 33″ O


In [394]:
print(mvp['Land- kreis'].unique())

dict_mvp={'VGVR':'VG',
         'LRO':'HRO'}

mvp['Land- kreis'].replace(dict_mvp, inplace=True)
print(mvp['Land- kreis'].unique())

['VR' 'HRO' 'LRO' 'VG' 'LUP' 'NWM' 'MSE']
['VR' 'HRO' 'VG' 'LUP' 'NWM' 'MSE']


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mvp['Land- kreis'].replace(dict_mvp, inplace=True)


### Nordrhein-Westfalien:

In [262]:
url_nw="https://de.wikipedia.org/wiki/Liste_von_Windkraftanlagen_in_Nordrhein-Westfalen"
# use requests.get() method with the provided static_url
response_nw=requests.get(url_nw).text
# assign the response to a object
soup_nw=BeautifulSoup(response_nw, 'html.parser')
# Use soup.title attribute
print('The title is:')
for title in soup_nw.find_all('title'):
    print(title.get_text())
    
html_tables=soup_nw.find_all('table')
table_nw = html_tables[0]

column_names = []

# Apply find_all() function with `th` element on first_launch_table
# Iterate each th element and apply the provided extract_column_from_header() to get a column name
# Append the Non-empty column name (`if name is not None and len(name) > 0`) into a list called column_names

columns=table_nw.find_all('th')
for column in columns:
    name=extract_column_from_header(column)
    
    if name!= None:
        if len(name)>0:
            column_names.append(name)
    
print(column_names)

The title is:
Liste von Windkraftanlagen in Nordrhein-Westfalen – Wikipedia
['Name', 'Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Koordinaten', 'Projektierer / Betreiber', 'Bemerkungen']


In [263]:
rows_nw=[]    
for row in table_nw.find_all('tr'):
        td = row.find_all('td')
        row = [row.text for row in td]
        rows_nw.append(row)
        
        
print(len(rows_nw))


815


In [264]:
table_dict= dict.fromkeys(column_names)



# Let's initial the table_dict with each value to be an empty list
table_dict['Name'] = []
table_dict['Baujahr'] = []
table_dict['Gesamt- leistung (MW)'] = []
table_dict['Anzahl'] = []
table_dict['Typ (WKA)'] = []
table_dict['Ort'] = []
table_dict['Land- kreis'] = []

table_dict['Koordinaten']=[]
table_dict['Projektierer / Betreiber']=[]
table_dict['Bemerkungen']=[]


#filling the dictionaries:



for i in rows_nw[1:len(rows_nw)]:
    if i[0]!=None:
        name=i[0].strip()
        name=re.sub(r'\[\d+\]', '', name)
        table_dict['Name'].append(name)
        
    years=x[1].strip()
    
    
    table_dict['Baujahr'].append(years)
    table_dict['Baujahr'] = [x[0:4] for x in table_dict['Baujahr']]
    
    table_dict['Gesamt- leistung (MW)'].append(i[2].strip())
    table_dict['Anzahl'].append(i[3].strip())
    table_dict['Typ (WKA)'].append(i[4].strip())
    table_dict['Ort'].append(i[5].strip())
    table_dict['Land- kreis'].append(i[6].strip())

    
    
    table_dict['Koordinaten'].append(i[7].strip())
    table_dict['Projektierer / Betreiber'].append(i[8].strip())
    table_dict['Bemerkungen'].append(i[-1].strip())                                             
  
    

In [274]:
df_nw= pd.DataFrame({ key:pd.Series(value) for key, value in table_dict.items() })

#splitting the coordinates:
fn = lambda x: pd.Series([i for i in (x.split('N,'))])
coord = df_nw['Koordinaten'].apply(fn)
coord.rename(columns={0:'Latitude',1:'Longitude'},inplace=True)
df_nw['Latitude']=coord['Latitude']
df_nw['Longitude']=coord['Longitude']



##remove not existing wiondparks
df_nw.drop(df_nw.loc[df_nw['Name']=='Windkraftanlage Aachen-Melaten'].index, inplace=True)
df_nw.drop(df_nw.loc[df_nw['Name']=='Windkraftanlage Bedburg-Hau'].index, inplace=True)
df_nw.drop(df_nw.loc[df_nw['Name']=='Windkraftanlage Budberg'].index, inplace=True)
df_nw.drop(df_nw.loc[df_nw['Name']=='Windkraftanlagen Buke'].index, inplace=True)
df_nw.drop(df_nw.loc[df_nw['Name']=='Windkraftanlagen Gangelt'].index, inplace=True)
df_nw.drop(df_nw.loc[df_nw['Name']=='Windkraftanlage Geisecke'].index, inplace=True)
df_nw.drop(df_nw.loc[df_nw['Name']=='Windkraftanlage Geldern-Walbeck'].index, inplace=True)
df_nw.drop(df_nw.loc[df_nw['Name']=='Windkraftanlage Gütersloh-Hollen'].index, inplace=True)
df_nw.drop(df_nw.loc[df_nw['Name']=='Windkraftanlage Isselhorst'].index, inplace=True)
df_nw.drop(df_nw.loc[df_nw['Name']=='Windkraftanlage Kaldenkirchen'].index, inplace=True)
df_nw.drop(df_nw.loc[df_nw['Name']=='Windkraftanlage Mettingen'].index, inplace=True)
df_nw.drop(df_nw.loc[df_nw['Name']=='Windkraftanlage Minden-Päpinghausen'].index, inplace=True)
df_nw.drop(df_nw.loc[df_nw['Name']=='Windkraftanlagen Obringhausen'].index, inplace=True)
df_nw.drop(df_nw.loc[df_nw['Name']=='Windkraftanlage Oer-Erkenschwick „Owi“'].index, inplace=True)
df_nw.drop(df_nw.loc[df_nw['Name']=='Windkraftanlage Oppenwehe'].index, inplace=True)
df_nw.drop(df_nw.loc[df_nw['Name']=='Windkraftanlage Ortfeld'].index, inplace=True)
df_nw.drop(df_nw.loc[df_nw['Name']=='Windkraftanlagen Obringhausen'].index, inplace=True)
df_nw.drop(df_nw.loc[df_nw['Name']=='Windkraftanlagen Vreden Süd'].index, inplace=True)
df_nw.drop(df_nw.loc[df_nw['Name']=='Windkraftanlage Setterich'].index, inplace=True)
df_nw.drop(df_nw.loc[df_nw['Name']=='Windkraftanlage Selkentrop'].index, inplace=True)






df_nw.set_index('Name', inplace=True)
df_nw['Land']='NW'
#df_nw['Anzahl']=df_nw['Anzahl'].fillna(0)
df_nw['Anzahl'] = [int(str(i)) for i in df_nw['Anzahl']]
df_nw['Gesamt- leistung (MW)'] = [float(str(i).replace(",", ".")) for i in df_nw['Gesamt- leistung (MW)']]


df_nw.head(3)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Koordinaten,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude,Land
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Windkraftanlage Aachen-Orsbach,2010,0.08,1,Enercon E-18 (1×),Orsbach,AC,"50,790556° N, 6,009° O",Wind e.V. Aachen,,"50,790556°","6,009° O",NW
Windkraftanlage Affeln,2010,0.6,1,Tacke TW 600 (1×),Affeln,MK,"51,27842° N, 7,89391° O",Windinvestor,,"51,27842°","7,89391° O",NW
Windkraftanlagen Ahlen-Gemmerich,2010,8.4,2,Enercon E-138 EP3 E2 (2×),Gemmerich,WAF,"51,73053° N, 7,884509° O",Vento Ludens GmbH ＆Co.KG,in Bau,"51,73053°","7,884509° O",NW


In [280]:
df_nw['Latitude']=df_nw['Latitude'].apply(lambda x: x.replace(',', '.'))
df_nw['Longitude']=df_nw['Longitude'].apply(lambda x: x.replace(',', '.'))

In [275]:
df_nw.info()

<class 'pandas.core.frame.DataFrame'>
Index: 795 entries, Windkraftanlage Aachen-Orsbach to Windpark Zülpich
Data columns (total 12 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Baujahr                   795 non-null    object 
 1   Gesamt- leistung (MW)     795 non-null    float64
 2   Anzahl                    795 non-null    int64  
 3   Typ (WKA)                 795 non-null    object 
 4   Ort                       795 non-null    object 
 5   Land- kreis               795 non-null    object 
 6   Koordinaten               795 non-null    object 
 7   Projektierer / Betreiber  795 non-null    object 
 8   Bemerkungen               795 non-null    object 
 9   Latitude                  795 non-null    object 
 10  Longitude                 795 non-null    object 
 11  Land                      795 non-null    object 
dtypes: float64(1), int64(1), object(10)
memory usage: 80.7+ KB


In [276]:
orts_nw_MW=df_nw.groupby(['Land- kreis']).agg({'Gesamt- leistung (MW)':'sum'}).sort_values(by='Gesamt- leistung (MW)',ascending=False)
orts_nw_MW.head(10)

Unnamed: 0_level_0,Gesamt- leistung (MW)
Land- kreis,Unnamed: 1_level_1
PB,1358.505
BOR,652.48
ST,630.875
DN,481.64
WAF,412.37
SO,408.01
KLE,406.73
HSK,329.762
HX,317.52
COE,292.47


In [277]:
orts_nw=df_nw.groupby(['Land- kreis']).agg({'Anzahl':'sum'}).sort_values(by='Anzahl',ascending=False)
orts_nw.head(10)

Unnamed: 0_level_0,Anzahl
Land- kreis,Unnamed: 1_level_1
PB,544
ST,286
BOR,284
SO,279
WAF,211
KLE,206
DN,184
HX,176
LIP,143
EU,124


In [281]:
nw=df_nw[['Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Projektierer / Betreiber', 'Bemerkungen', 'Latitude', 'Longitude']]
nw.head(3)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Windkraftanlage Aachen-Orsbach,2010,0.08,1,Enercon E-18 (1×),Orsbach,AC,Wind e.V. Aachen,,50.790556°,6.009° O
Windkraftanlage Affeln,2010,0.6,1,Tacke TW 600 (1×),Affeln,MK,Windinvestor,,51.27842°,7.89391° O
Windkraftanlagen Ahlen-Gemmerich,2010,8.4,2,Enercon E-138 EP3 E2 (2×),Gemmerich,WAF,Vento Ludens GmbH ＆Co.KG,in Bau,51.73053°,7.884509° O


In [393]:
print(nw['Land- kreis'].unique())

dict_nw={'HAMK':'HA',
        'OESI':'OE',
        'ACDN':'AC',
        'HSDN':'HS',
         'HSKPB':'HS',
        'REWES':'RE',
        'BMNE':'BM',
        'WESRE':'WES',
        'BORST':'BOR',
        'MIHF':'MI',
        'BIGT':'BI',
        'OWL':'BI'}

   




nw['Land- kreis'].replace(dict_nw, inplace=True)
print(nw['Land- kreis'].unique())

['AC' 'MK' 'WAF' 'LIP' 'UN' 'OE' 'KLE' 'HAM' 'GT' 'COE' 'RE' 'DN' 'EN'
 'GM' 'EU' 'HF' 'SO' 'MI' 'BOR' 'BOT' 'VIE' 'HX' 'PB' 'WES' 'BM' 'ST' 'MS'
 'HSK' 'DO' 'HS' 'SI' 'HA' 'GE' 'ME' 'BI' 'NE' 'RS' 'MH' 'OWL' 'GS' 'GL'
 'W' 'KR' 'MG' 'SU']
['AC' 'MK' 'WAF' 'LIP' 'UN' 'OE' 'KLE' 'HAM' 'GT' 'COE' 'RE' 'DN' 'EN'
 'GM' 'EU' 'HF' 'SO' 'MI' 'BOR' 'BOT' 'VIE' 'HX' 'PB' 'WES' 'BM' 'ST' 'MS'
 'HSK' 'DO' 'HS' 'SI' 'HA' 'GE' 'ME' 'BI' 'NE' 'RS' 'MH' 'GS' 'GL' 'W'
 'KR' 'MG' 'SU']


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nw['Land- kreis'].replace(dict_nw, inplace=True)


### Rheinland-Pfalz:

In [177]:
url_rp="https://de.wikipedia.org/wiki/Liste_von_Windkraftanlagen_in_Rheinland-Pfalz"
# use requests.get() method with the provided static_url
response_rp=requests.get(url_rp).text
# assign the response to a object
soup_rp=BeautifulSoup(response_rp, 'html.parser')
# Use soup.title attribute
print('The title is:')
for title in soup_rp.find_all('title'):
    print(title.get_text())
    
html_tables=soup_rp.find_all('table')
table_rp = html_tables[0]

column_names = []

# Apply find_all() function with `th` element on first_launch_table
# Iterate each th element and apply the provided extract_column_from_header() to get a column name
# Append the Non-empty column name (`if name is not None and len(name) > 0`) into a list called column_names

columns=table_rp.find_all('th')
for column in columns:
    name=extract_column_from_header(column)
    
    if name!= None:
        if len(name)>0:
            column_names.append(name)
    
print(column_names)

The title is:
Liste von Windkraftanlagen in Rheinland-Pfalz – Wikipedia
['Name', 'Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Koordinaten', 'Projektierer / Betreiber', 'Bemerkungen']


In [178]:
rows_rp=[]    
for row in table_rp.find_all('tr'):
        td = row.find_all('td')
        row = [row.text for row in td]
        rows_rp.append(row)
        
        
print(len(rows_rp))


311


In [179]:
table_dict= dict.fromkeys(column_names)



# Let's initial the table_dict with each value to be an empty list
table_dict['Name'] = []
table_dict['Baujahr'] = []
table_dict['Gesamt- leistung (MW)'] = []
table_dict['Anzahl'] = []
table_dict['Typ (WKA)'] = []
table_dict['Ort'] = []
table_dict['Land- kreis'] = []

table_dict['Koordinaten']=[]
table_dict['Projektierer / Betreiber']=[]
table_dict['Bemerkungen']=[]


#filling the dictionaries:



for i in rows_rp[1:len(rows_rp)]:
    if i[0]!=None:
        name=i[0].strip()
        name=re.sub(r'\[\d+\]', '', name)
        table_dict['Name'].append(name)
        
    years=x[1].strip()
    table_dict['Baujahr'].append(years)
    table_dict['Baujahr'] = [x[0:4] for x in table_dict['Baujahr']]
    
    table_dict['Gesamt- leistung (MW)'].append(i[2].strip())
    table_dict['Anzahl'].append(i[3].strip())
    table_dict['Typ (WKA)'].append(i[4].strip())
    table_dict['Ort'].append(i[5].strip())
    table_dict['Land- kreis'].append(i[6].strip())

    table_dict['Koordinaten'].append(i[7].strip())
    table_dict['Projektierer / Betreiber'].append(i[8].strip())
    table_dict['Bemerkungen'].append(i[-1].strip())                                             
  
    

In [180]:
df_rp= pd.DataFrame({ key:pd.Series(value) for key, value in table_dict.items() })

#splitting the coordinates:
fn = lambda x: pd.Series([i for i in (x.split(','))])
coord = df_rp['Koordinaten'].apply(fn)
coord.rename(columns={0:'Latitude',1:'Longitude'},inplace=True)
df_rp['Latitude']=coord['Latitude']
df_rp['Longitude']=coord['Longitude']

#remove not existing windparks
df_rp.drop(df_rp.loc[df_rp['Name']=='Windkraftanlage Bleckhausen'].index, inplace=True)
df_rp.drop(df_rp.loc[df_rp['Name']=='Windkraftanlage Bleialf'].index, inplace=True)
df_rp.drop(df_rp.loc[df_rp['Name']=='Windkraftanlage Irrhausen'].index, inplace=True)
df_rp.drop(df_rp.loc[df_rp['Name']=='Windkraftanlage Kell am See'].index, inplace=True)
df_rp.drop(df_rp.loc[df_rp['Name']=='Windkraftanlage Pintesfeld'].index, inplace=True)
df_rp.drop(df_rp.loc[df_rp['Name']=='Windkraftanlage Reifferscheid'].index, inplace=True)
df_rp.drop(df_rp.loc[df_rp['Name']=='Windkraftanlage Reinsfeld'].index, inplace=True)
df_rp.drop(df_rp.loc[df_rp['Name']=='Windkraftanlage Wiebelsheim'].index, inplace=True)
df_rp.drop(df_rp.loc[df_rp['Name']=='Windkraftanlagen Felsbergerhof'].index, inplace=True)



df_rp.set_index('Name', inplace=True)
df_rp['Land']='RP'


df_rp['Anzahl'] = [int(str(i)) for i in df_rp['Anzahl']]
df_rp['Gesamt- leistung (MW)'] = [float(str(i).replace(",", ".")) for i in df_rp['Gesamt- leistung (MW)']]


df_rp.head(3)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Koordinaten,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude,Land
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Windkraftanlage Badenheim,2010,3.05,1,Enercon E-101 (1×),Badenheim,MZ,"49° 49′ 58″ N, 7° 59′ 15″ O",GAIA,,49° 49′ 58″ N,7° 59′ 15″ O,RP
Windkraftanlage Berghausen,2010,1.3,1,AN Bonus 1300/62 (1×),Berghausen,EMS,"50° 15′ 24″ N, 8° 1′ 0″ O",VS Windkraft,eine Seewind 25/132 wurde 2013 zugunsten eines...,50° 15′ 24″ N,8° 1′ 0″ O,RP
Windkraftanlage Burg,2010,1.5,1,Südwind S70 (1×),Burg,BIT,"49° 57′ 8″ N, 6° 21′ 0″ O",,,49° 57′ 8″ N,6° 21′ 0″ O,RP


In [181]:
df_rp.info()

<class 'pandas.core.frame.DataFrame'>
Index: 301 entries, Windkraftanlage Badenheim to Windpark Zilshausen
Data columns (total 12 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Baujahr                   301 non-null    object 
 1   Gesamt- leistung (MW)     301 non-null    float64
 2   Anzahl                    301 non-null    int64  
 3   Typ (WKA)                 301 non-null    object 
 4   Ort                       301 non-null    object 
 5   Land- kreis               301 non-null    object 
 6   Koordinaten               301 non-null    object 
 7   Projektierer / Betreiber  301 non-null    object 
 8   Bemerkungen               301 non-null    object 
 9   Latitude                  301 non-null    object 
 10  Longitude                 301 non-null    object 
 11  Land                      301 non-null    object 
dtypes: float64(1), int64(1), object(10)
memory usage: 30.6+ KB


In [182]:
orts_rp_MW=df_rp.groupby(['Land- kreis']).agg({'Gesamt- leistung (MW)':'sum'}).sort_values(by='Gesamt- leistung (MW)',ascending=False)
orts_rp_MW.head(10)

Unnamed: 0_level_0,Gesamt- leistung (MW)
Land- kreis,Unnamed: 1_level_1
SIM,676.45
BIT,437.92
AZ,413.58
TR,243.2
WIL,241.87
KIB,213.0
DAU,187.65
BIR,172.0
WW,165.38
COC,164.35


In [183]:
orts_rp=df_rp.groupby(['Land- kreis']).agg({'Anzahl':'sum'}).sort_values(by='Anzahl',ascending=False)
orts_rp.head(10)

Unnamed: 0_level_0,Anzahl
Land- kreis,Unnamed: 1_level_1
SIM,268
BIT,252
AZ,137
DAU,112
TR,101
WW,100
COC,86
BIR,72
WIL,71
KIB,71


In [241]:
rp=df_rp[['Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Projektierer / Betreiber', 'Bemerkungen', 'Latitude', 'Longitude']]
rp.head(3)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Windkraftanlage Badenheim,2010,3.05,1,Enercon E-101 (1×),Badenheim,MZ,GAIA,,49° 49′ 58″ N,7° 59′ 15″ O
Windkraftanlage Berghausen,2010,1.3,1,AN Bonus 1300/62 (1×),Berghausen,EMS,VS Windkraft,eine Seewind 25/132 wurde 2013 zugunsten eines...,50° 15′ 24″ N,8° 1′ 0″ O
Windkraftanlage Burg,2010,1.5,1,Südwind S70 (1×),Burg,BIT,,,49° 57′ 8″ N,6° 21′ 0″ O


In [395]:
print(rp['Land- kreis'].unique())

dict_rp={'DÜWRP':'DÜW',
        'AZMZ':'AZ',
        'MZMZ':'MZ',
        'BIRKUS':'BIR',
         'AZKIB':'AZ',
        'KHKIB':'KH',
        'KLPS':'KL',
        'AZDÜWKIB':'AZ',
        'KHSIMMZ':'KH',
        'COCSIM':'COC',
        'KUSKIB':'KUS',
        'RHKMZ':'RHK',
        'GERSÜW':'GER',
        'SIMKH':'SIM',
        'AWMYK':'AW',
        'TRBIT':'TR',
        'AZWO':'AZ',
        'KUSKL':'KUS',
        'KLKUS':'KL',
        'MAY':'MYK',
        'RHK':'SIM'}

rp['Land- kreis'].replace(dict_rp, inplace=True)
print(rp['Land- kreis'].unique())


['MZ' 'EMS' 'BIT' 'BIR' 'KL' 'WW' 'KIB' 'WIL' 'PS' 'SIM' 'DAU' 'AK' 'COC'
 'AZ' 'RP' 'KH' 'KUS' 'DÜW' 'MYK' 'GER' 'TR' 'MAY' 'SÜW' 'RHK' 'AW']
['MZ' 'EMS' 'BIT' 'BIR' 'KL' 'WW' 'KIB' 'WIL' 'PS' 'SIM' 'DAU' 'AK' 'COC'
 'AZ' 'RP' 'KH' 'KUS' 'DÜW' 'MYK' 'GER' 'TR' 'SÜW' 'AW']


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rp['Land- kreis'].replace(dict_rp, inplace=True)


### Saarland:

In [184]:
url_sa="https://de.wikipedia.org/wiki/Liste_von_Windkraftanlagen_im_Saarland"
# use requests.get() method with the provided static_url
response_sa=requests.get(url_sa).text
# assign the response to a object
soup_sa=BeautifulSoup(response_sa, 'html.parser')
# Use soup.title attribute
print('The title is:')
for title in soup_sa.find_all('title'):
    print(title.get_text())
    
html_tables=soup_sa.find_all('table')
table_sa = html_tables[0]

column_names = []

# Apply find_all() function with `th` element on first_launch_table
# Iterate each th element and apply the provided extract_column_from_header() to get a column name
# Append the Non-empty column name (`if name is not None and len(name) > 0`) into a list called column_names

columns=table_sa.find_all('th')
for column in columns:
    name=extract_column_from_header(column)
    
    if name!= None:
        if len(name)>0:
            column_names.append(name)
    
print(column_names)

The title is:
Liste von Windkraftanlagen im Saarland – Wikipedia
['Name', 'Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Koordinaten', 'Projektierer / Betreiber', 'Bemerkungen']


In [185]:
rows_sa=[]    
for row in table_sa.find_all('tr'):
        td = row.find_all('td')
        row = [row.text for row in td]
        rows_sa.append(row)
        
        
print(len(rows_sa))


51


In [186]:
table_dict= dict.fromkeys(column_names)



# Let's initial the table_dict with each value to be an empty list
table_dict['Name'] = []
table_dict['Baujahr'] = []
table_dict['Gesamt- leistung (MW)'] = []
table_dict['Anzahl'] = []
table_dict['Typ (WKA)'] = []
table_dict['Ort'] = []
table_dict['Land- kreis'] = []

table_dict['Koordinaten']=[]
table_dict['Projektierer / Betreiber']=[]
table_dict['Bemerkungen']=[]


#filling the dictionaries:



for i in rows_sa[1:len(rows_sa)]:
    if i[0]!=None:
        name=i[0].strip()
        name=re.sub(r'\[\d+\]', '', name)
        table_dict['Name'].append(name)
        
    years=x[1].strip()
    table_dict['Baujahr'].append(years)
    table_dict['Baujahr'] = [x[0:4] for x in table_dict['Baujahr']]
    
    table_dict['Gesamt- leistung (MW)'].append(i[2].strip())
    table_dict['Anzahl'].append(i[3].strip())
    table_dict['Typ (WKA)'].append(i[4].strip())
    table_dict['Ort'].append(i[5].strip())
    table_dict['Land- kreis'].append(i[6].strip())

    table_dict['Koordinaten'].append(i[7].strip())
    table_dict['Projektierer / Betreiber'].append(i[8].strip())
    table_dict['Bemerkungen'].append(i[-1].strip())                                             
  
    

In [188]:
df_sa= pd.DataFrame({ key:pd.Series(value) for key, value in table_dict.items() })

#splitting the coordinates:
fn = lambda x: pd.Series([i for i in (x.split(','))])
coord = df_sa['Koordinaten'].apply(fn)
coord.rename(columns={0:'Latitude',1:'Longitude'},inplace=True)
df_sa['Latitude']=coord['Latitude']
df_sa['Longitude']=coord['Longitude']



df_sa.set_index('Name', inplace=True)
df_sa['Land']='SA'
#df_nw['Anzahl']=df_nw['Anzahl'].fillna(0)
df_sa['Anzahl'] = [int(str(i)) for i in df_sa['Anzahl']]
df_sa['Gesamt- leistung (MW)'] = [float(str(i).replace(",", ".")) for i in df_sa['Gesamt- leistung (MW)']]


df_sa.head(3)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Koordinaten,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude,Land
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Windkraftanlage Erkershöhe,2010,3.0,1,Nordex N131/3000 (1×),Wemmetsweiler,NK,"49° 20′ 31″ N, 7° 4′ 30″ O",montanWIND GmbH,,49° 20′ 31″ N,7° 4′ 30″ O,SA
Windkraftanlage Riesweiler,2010,0.25,1,Nordex N29/250 (1×),Riesweiler,HOM,"49° 8′ 3″ N, 7° 18′ 22″ O",,,49° 8′ 3″ N,7° 18′ 22″ O,SA
Windkraftanlagen Gersweiler,2010,9.76,2,Enercon E-138 EP3 E2 (1×)Enercon E-160 EP5 E3 ...,Gersweiler,SB,"49° 12′ 35″ N, 6° 53′ 32″ O",DunoAir,,49° 12′ 35″ N,6° 53′ 32″ O,SA


In [189]:
df_sa.info()

<class 'pandas.core.frame.DataFrame'>
Index: 50 entries, Windkraftanlage Erkershöhe to Windpark Wolfersweiler
Data columns (total 12 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Baujahr                   50 non-null     object 
 1   Gesamt- leistung (MW)     50 non-null     float64
 2   Anzahl                    50 non-null     int64  
 3   Typ (WKA)                 50 non-null     object 
 4   Ort                       50 non-null     object 
 5   Land- kreis               50 non-null     object 
 6   Koordinaten               50 non-null     object 
 7   Projektierer / Betreiber  50 non-null     object 
 8   Bemerkungen               50 non-null     object 
 9   Latitude                  50 non-null     object 
 10  Longitude                 50 non-null     object 
 11  Land                      50 non-null     object 
dtypes: float64(1), int64(1), object(10)
memory usage: 5.1+ KB


In [190]:
orts_sa_MW=df_sa.groupby(['Land- kreis']).agg({'Gesamt- leistung (MW)':'sum'}).sort_values(by='Gesamt- leistung (MW)',ascending=False)
orts_sa_MW.head(10)

Unnamed: 0_level_0,Gesamt- leistung (MW)
Land- kreis,Unnamed: 1_level_1
MZG,177.7
WND,165.35
SLS,63.5
NKWND,51.35
NK,35.5
WNDMZG,24.05
NKHOM,16.5
HOM,16.0
SB,15.76


In [191]:
orts_sa=df_sa.groupby(['Land- kreis']).agg({'Anzahl':'sum'}).sort_values(by='Anzahl',ascending=False)
orts_sa.head(10)

Unnamed: 0_level_0,Anzahl
Land- kreis,Unnamed: 1_level_1
WND,71
MZG,68
SLS,21
NKWND,19
NK,16
WNDMZG,10
HOM,8
NKHOM,5
SB,4


In [239]:
sa=df_sa[['Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Projektierer / Betreiber', 'Bemerkungen', 'Latitude', 'Longitude']]
sa.head(3)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Windkraftanlage Erkershöhe,2010,3.0,1,Nordex N131/3000 (1×),Wemmetsweiler,NK,montanWIND GmbH,,49° 20′ 31″ N,7° 4′ 30″ O
Windkraftanlage Riesweiler,2010,0.25,1,Nordex N29/250 (1×),Riesweiler,HOM,,,49° 8′ 3″ N,7° 18′ 22″ O
Windkraftanlagen Gersweiler,2010,9.76,2,Enercon E-138 EP3 E2 (1×)Enercon E-160 EP5 E3 ...,Gersweiler,SB,DunoAir,,49° 12′ 35″ N,6° 53′ 32″ O


In [362]:
print(sa['Land- kreis'].unique())

dict_sa={
        'NKWND':'NK',
        'NKHOM':'NK',
        'WNDMZG':'WND'}

sa['Land- kreis'].replace(dict_sa, inplace=True)
print(sa['Land- kreis'].unique())


['NK' 'HOM' 'SB' 'SLS' 'MZG' 'WND' 'NKWND' 'NKHOM' 'WNDMZG']
['NK' 'HOM' 'SB' 'SLS' 'MZG' 'WND']


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sa['Land- kreis'].replace(dict_sa, inplace=True)


### Sachsen:

In [192]:
url_s="https://de.wikipedia.org/wiki/Liste_von_Windkraftanlagen_in_Sachsen"
# use requests.get() method with the provided static_url
response_s=requests.get(url_s).text
# assign the response to a object
soup_s=BeautifulSoup(response_s, 'html.parser')
# Use soup.title attribute
print('The title is:')
for title in soup_s.find_all('title'):
    print(title.get_text())
    
html_tables=soup_s.find_all('table')
table_s = html_tables[0]

column_names = []

# Apply find_all() function with `th` element on first_launch_table
# Iterate each th element and apply the provided extract_column_from_header() to get a column name
# Append the Non-empty column name (`if name is not None and len(name) > 0`) into a list called column_names

columns=table_s.find_all('th')
for column in columns:
    name=extract_column_from_header(column)
    
    if name!= None:
        if len(name)>0:
            column_names.append(name)
    
print(column_names)


rows_s=[]    
for row in table_s.find_all('tr'):
        td = row.find_all('td')
        row = [row.text for row in td]
        rows_s.append(row)
        
        
print(len(rows_s))



The title is:
Liste von Windkraftanlagen in Sachsen – Wikipedia
['Name', 'Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Koordinaten', 'Projektierer / Betreiber', 'Bemerkungen']
174


In [193]:
table_dict= dict.fromkeys(column_names)



# Let's initial the table_dict with each value to be an empty list
table_dict['Name'] = []
table_dict['Baujahr'] = []
table_dict['Gesamt- leistung (MW)'] = []
table_dict['Anzahl'] = []
table_dict['Typ (WKA)'] = []
table_dict['Ort'] = []
table_dict['Land- kreis'] = []

table_dict['Koordinaten']=[]
table_dict['Projektierer / Betreiber']=[]
table_dict['Bemerkungen']=[]


#filling the dictionaries:



for i in rows_s[1:len(rows_s)]:
    if i[0]!=None:
        name=i[0].strip()
        name=re.sub(r'\[\d+\]', '', name)
        table_dict['Name'].append(name)
        
    years=x[1].strip()
    table_dict['Baujahr'].append(years)
    table_dict['Baujahr'] = [x[0:4] for x in table_dict['Baujahr']]
    
    table_dict['Gesamt- leistung (MW)'].append(i[2].strip())
    table_dict['Anzahl'].append(i[3].strip())
    table_dict['Typ (WKA)'].append(i[4].strip())
    table_dict['Ort'].append(i[5].strip())
    table_dict['Land- kreis'].append(i[6].strip())

    table_dict['Koordinaten'].append(i[7].strip())
    table_dict['Projektierer / Betreiber'].append(i[8].strip())
    table_dict['Bemerkungen'].append(i[-1].strip())                                             
  
    

In [196]:
df_s= pd.DataFrame({ key:pd.Series(value) for key, value in table_dict.items() })





#splitting the coordinates:
fn = lambda x: pd.Series([i for i in (x.split(','))])
coord = df_s['Koordinaten'].apply(fn)
coord.rename(columns={0:'Latitude',1:'Longitude'},inplace=True)
df_s['Latitude']=coord['Latitude']
df_s['Longitude']=coord['Longitude']


#remove not existing wiondparks
df_s.drop(df_s.loc[df_s['Name']=='Windkraftanlage Ruppersdorf'].index, inplace=True)
df_s.drop(df_s.loc[df_s['Name']=='Windkraftanlage Seifersdorf'].index, inplace=True)
df_s.drop(df_s.loc[df_s['Name']=='Windkraftanlagen Zaulsdorf'].index, inplace=True)
df_s.drop(df_s.loc[df_s['Name']=='Windpark Bernsbach'].index, inplace=True)
df_s.drop(df_s.loc[df_s['Name']=='Windkraftpark Nossen (Windpark Zellsteig)'].index, inplace=True)



df_s.set_index('Name', inplace=True)
df_s['Land']='SA'
#df_nw['Anzahl']=df_nw['Anzahl'].fillna(0)
df_s['Anzahl'] = [int(str(i)) for i in df_s['Anzahl']]
df_s['Gesamt- leistung (MW)'] = [float(str(i).replace(",", ".")) for i in df_s['Gesamt- leistung (MW)']]


df_s.head(3)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Koordinaten,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude,Land
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Windkraftanlage Altenhain,2010,0.25,1,Nordex N29/250 (1×),Altenhain,L,"51° 17′ 4″ N, 12° 42′ 16″ O",,eine Nordex N27/150 wurde 2015 abgebaut,51° 17′ 4″ N,12° 42′ 16″ O,SA
Windkraftanlage Börnichen,2010,1.8,1,Enercon E-66/18.70 (1×),Börnichen,ERZ,"50° 45′ 26″ N, 13° 7′ 40″ O",,,50° 45′ 26″ N,13° 7′ 40″ O,SA
Windkraftanlage Ebersdorf,2010,0.6,1,Enercon E-40/6.44 (1×),Ebersdorf,GR,"51° 4′ 8″ N, 14° 40′ 39″ O",,,51° 4′ 8″ N,14° 40′ 39″ O,SA


In [197]:
df_s.info()

<class 'pandas.core.frame.DataFrame'>
Index: 168 entries, Windkraftanlage Altenhain to Windpark Zwickau-Lippoldsruh
Data columns (total 12 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Baujahr                   168 non-null    object 
 1   Gesamt- leistung (MW)     168 non-null    float64
 2   Anzahl                    168 non-null    int64  
 3   Typ (WKA)                 168 non-null    object 
 4   Ort                       168 non-null    object 
 5   Land- kreis               168 non-null    object 
 6   Koordinaten               168 non-null    object 
 7   Projektierer / Betreiber  168 non-null    object 
 8   Bemerkungen               168 non-null    object 
 9   Latitude                  168 non-null    object 
 10  Longitude                 168 non-null    object 
 11  Land                      168 non-null    object 
dtypes: float64(1), int64(1), object(10)
memory usage: 17.1+ KB


In [198]:
orts_s_MW=df_s.groupby(['Land- kreis']).agg({'Gesamt- leistung (MW)':'sum'}).sort_values(by='Gesamt- leistung (MW)',ascending=False)
orts_s_MW.head(10)

Unnamed: 0_level_0,Gesamt- leistung (MW)
Land- kreis,Unnamed: 1_level_1
FG,243.01
GR,213.1
MEI,186.97
Z,138.785
BZ,138.51
L,129.46
TDO,107.66
FGERZ,59.0
PIR,46.77
ERZ,44.36


In [200]:
orts_s=df_s.groupby(['Land- kreis']).agg({'Anzahl':'sum'}).sort_values(by='Anzahl',ascending=False)
orts_s.head(10)

Unnamed: 0_level_0,Anzahl
Land- kreis,Unnamed: 1_level_1
FG,171
GR,123
L,90
TDO,90
BZ,86
MEI,81
Z,53
PIR,51
ERZ,40
FGERZ,28


In [238]:
s=df_s[['Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Projektierer / Betreiber', 'Bemerkungen', 'Latitude', 'Longitude']]
s.head(3)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Windkraftanlage Altenhain,2010,0.25,1,Nordex N29/250 (1×),Altenhain,L,,eine Nordex N27/150 wurde 2015 abgebaut,51° 17′ 4″ N,12° 42′ 16″ O
Windkraftanlage Börnichen,2010,1.8,1,Enercon E-66/18.70 (1×),Börnichen,ERZ,,,50° 45′ 26″ N,13° 7′ 40″ O
Windkraftanlage Ebersdorf,2010,0.6,1,Enercon E-40/6.44 (1×),Ebersdorf,GR,,,51° 4′ 8″ N,14° 40′ 39″ O


In [364]:
print(s['Land- kreis'].unique())

dict_s={
        'TDOL':'TDO',
        'FGERZ':'FG',
        'PIRBZ':'PIR',
          'FGZ':'FG'}

s['Land- kreis'].replace(dict_s, inplace=True)
print(s['Land- kreis'].unique())


['L' 'ERZ' 'GR' 'FG' 'TDO' 'BZ' 'PIR' 'Z' 'V' 'C' 'MEI' 'TDOL' 'FGERZ'
 'FGZ' 'PIRBZ']
['L' 'ERZ' 'GR' 'FG' 'TDO' 'BZ' 'PIR' 'Z' 'V' 'C' 'MEI']


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  s['Land- kreis'].replace(dict_s, inplace=True)


### Sachsen-Anhalt:

In [201]:
url_an="https://de.wikipedia.org/wiki/Liste_von_Windkraftanlagen_in_Sachsen-Anhalt"
# use requests.get() method with the provided static_url
response_an=requests.get(url_an).text
# assign the response to a object
soup_an=BeautifulSoup(response_an, 'html.parser')
# Use soup.title attribute
print('The title is:')
for title in soup_an.find_all('title'):
    print(title.get_text())
    
html_tables=soup_an.find_all('table')
table_an = html_tables[0]

column_names = []

# Apply find_all() function with `th` element on first_launch_table
# Iterate each th element and apply the provided extract_column_from_header() to get a column name
# Append the Non-empty column name (`if name is not None and len(name) > 0`) into a list called column_names

columns=table_an.find_all('th')
for column in columns:
    name=extract_column_from_header(column)
    
    if name!= None:
        if len(name)>0:
            column_names.append(name)
    
print(column_names)

The title is:
Liste von Windkraftanlagen in Sachsen-Anhalt – Wikipedia
['Name', 'Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Koordinaten', 'Projektierer / Betreiber', 'Bemerkungen']


In [203]:
rows_an=[]    
for row in table_an.find_all('tr'):
        td = row.find_all('td')
        row = [row.text for row in td]
        rows_an.append(row)
        
        
print(len(rows_an))




table_dict= dict.fromkeys(column_names)



# Let's initial the table_dict with each value to be an empty list
table_dict['Name'] = []
table_dict['Baujahr'] = []
table_dict['Gesamt- leistung (MW)'] = []
table_dict['Anzahl'] = []
table_dict['Typ (WKA)'] = []
table_dict['Ort'] = []
table_dict['Land- kreis'] = []

table_dict['Koordinaten']=[]
table_dict['Projektierer / Betreiber']=[]
table_dict['Bemerkungen']=[]


#filling the dictionaries:



for i in rows_an[1:len(rows_an)]:
    if i[0]!=None:
        name=i[0].strip()
        name=re.sub(r'\[\d+\]', '', name)
        table_dict['Name'].append(name)
        
    years=x[1].strip()
    table_dict['Baujahr'].append(years)
    table_dict['Baujahr'] = [x[0:4] for x in table_dict['Baujahr']]
    
    table_dict['Gesamt- leistung (MW)'].append(i[2].strip())
    table_dict['Anzahl'].append(i[3].strip())
    table_dict['Typ (WKA)'].append(i[4].strip())
    table_dict['Ort'].append(i[5].strip())
    table_dict['Land- kreis'].append(i[6].strip())

    table_dict['Koordinaten'].append(i[7].strip())
    table_dict['Projektierer / Betreiber'].append(i[8].strip())
    table_dict['Bemerkungen'].append(i[-1].strip())                                             
  
    

237


In [208]:
df_an= pd.DataFrame({ key:pd.Series(value) for key, value in table_dict.items() })


#splitting the coordinates:
fn = lambda x: pd.Series([i for i in (x.split(','))])
coord = df_an['Koordinaten'].apply(fn)
coord.rename(columns={0:'Latitude',1:'Longitude'},inplace=True)
df_an['Latitude']=coord['Latitude']
df_an['Longitude']=coord['Longitude']


#remove not existing wiondparks
df_an.drop(df_an.loc[df_an['Name']=='Windkraftanlage Großgräfendorf'].index, inplace=True)
df_an.drop(df_an.loc[df_an['Name']=='Windkraftanlage Harsleben'].index, inplace=True)
df_an.drop(df_an.loc[df_an['Name']=='Windkraftanlage Klein Quenstedt'].index, inplace=True)
df_an.drop(df_an.loc[df_an['Name']=='Windkraftanlage Rohrsheim'].index, inplace=True)
df_an.drop(df_an.loc[df_an['Name']=='Windkraftanlage Roßberg'].index, inplace=True)
df_an.drop(df_an.loc[df_an['Name']=='Windkraftanlage Stegelitz'].index, inplace=True)
df_an.drop(df_an.loc[df_an['Name']=='Windkraftanlage Wust'].index, inplace=True)
df_an.drop(df_an.loc[df_an['Name']=='Windkraftanlagen Altenroda'].index, inplace=True)
df_an.drop(df_an.loc[df_an['Name']=='Windkraftanlagen Ditfurt'].index, inplace=True)
df_an.drop(df_an.loc[df_an['Name']=='Windkraftanlagen Schwenda'].index, inplace=True)
df_an.drop(df_an.loc[df_an['Name']=='Windpark Baumgarten'].index, inplace=True)
df_an.drop(df_an.loc[df_an['Name']=='Windpark Galgenberg'].index, inplace=True)
df_an.drop(df_an.loc[df_an['Name']=='Windpark Gleina'].index, inplace=True)
df_an.drop(df_an.loc[df_an['Name']=='Windpark Obermöllern'].index, inplace=True)
df_an.drop(df_an.loc[df_an['Name']=='Windpark Klein Schwechten'].index, inplace=True)




df_an.set_index('Name', inplace=True)
df_an['Land']='SA'

df_an['Anzahl'] = [int(str(i)) for i in df_an['Anzahl']]
df_an['Gesamt- leistung (MW)'] = [float(str(i).replace(",", ".")) for i in df_an['Gesamt- leistung (MW)']]


df_an.head(3)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Koordinaten,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude,Land
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Windkraftanlage Altenhain,2010,0.25,1,Nordex N29/250 (1×),Altenhain,L,"51° 17′ 4″ N, 12° 42′ 16″ O",,eine Nordex N27/150 wurde 2015 abgebaut,51° 17′ 4″ N,12° 42′ 16″ O,SA
Windkraftanlage Börnichen,2010,1.8,1,Enercon E-66/18.70 (1×),Börnichen,ERZ,"50° 45′ 26″ N, 13° 7′ 40″ O",,,50° 45′ 26″ N,13° 7′ 40″ O,SA
Windkraftanlage Ebersdorf,2010,0.6,1,Enercon E-40/6.44 (1×),Ebersdorf,GR,"51° 4′ 8″ N, 14° 40′ 39″ O",,,51° 4′ 8″ N,14° 40′ 39″ O,SA


In [209]:
df_an.info()

<class 'pandas.core.frame.DataFrame'>
Index: 221 entries, Windkraftanlage Ackendorf to Windpark Zschornewitz
Data columns (total 12 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Baujahr                   221 non-null    object 
 1   Gesamt- leistung (MW)     221 non-null    float64
 2   Anzahl                    221 non-null    int64  
 3   Typ (WKA)                 221 non-null    object 
 4   Ort                       221 non-null    object 
 5   Land- kreis               221 non-null    object 
 6   Koordinaten               221 non-null    object 
 7   Projektierer / Betreiber  221 non-null    object 
 8   Bemerkungen               221 non-null    object 
 9   Latitude                  221 non-null    object 
 10  Longitude                 221 non-null    object 
 11  Land                      221 non-null    object 
dtypes: float64(1), int64(1), object(10)
memory usage: 22.4+ KB


In [210]:
orts_an=df_an.groupby(['Land- kreis']).agg({'Anzahl':'sum'}).sort_values(by='Anzahl',ascending=False)
orts_an.head(10)

Unnamed: 0_level_0,Anzahl
Land- kreis,Unnamed: 1_level_1
BK,415
SLK,384
SDL,308
BLK,253
ABI,242
SAW,213
MSH,178
SK,171
WB,150
JL,138


In [212]:
orts_an_MW=df_an.groupby(['Land- kreis']).agg({'Gesamt- leistung (MW)':'sum'}).sort_values(by='Gesamt- leistung (MW)',ascending=False)
orts_an_MW.head(10)

Unnamed: 0_level_0,Gesamt- leistung (MW)
Land- kreis,Unnamed: 1_level_1
SDL,857.32
BK,725.6
SLK,723.25
BLK,568.22
ABI,505.38
SAW,401.25
WB,364.46
SK,326.55
MSH,287.57
JL,286.25


In [237]:
an=df_an[['Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Projektierer / Betreiber', 'Bemerkungen', 'Latitude', 'Longitude']]
an.head(3)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Windkraftanlage Ackendorf,2010,1.5,1,GE Wind Energy 1.5sl (1×),Haldensleben,BK,Germania Windpark,,52° 13′ 29″ N,11° 25′ 34″ O
Windkraftanlage Breitenstein,2010,0.5,1,Enercon E-40/5.40 (1×),Breitenstein,MSH,,,51° 37′ 26″ N,10° 56′ 29″ O
Windkraftanlage Bretsch,2010,0.5,1,Enercon E-40/5.40 (1×),Bretsch,SDL,,,52° 50′ 13″ N,11° 38′ 20″ O


In [366]:
print(an['Land- kreis'].unique())

dict_an={
        'SKMSH':'SK',
        'SKSLK':'SK',
        'MSHSK':'MSH',
        'JLSDL':'JL',
        'SAWSDL':'SAW',
         'MSHSLK':'MSH',
          'BKHZ':'BK'}

an['Land- kreis'].replace(dict_an, inplace=True)
print(an['Land- kreis'].unique())

['BK' 'MSH' 'SDL' 'WB' 'HZ' 'SK' 'JL' 'BLK' 'ABI' 'MD' 'DE' 'SAW' 'SLK'
 'SKMSH' 'SKSLK' 'MSHSK' 'JLSDL' 'SAWSDL' 'MSHSLK' 'BKHZ']
['BK' 'MSH' 'SDL' 'WB' 'HZ' 'SK' 'JL' 'BLK' 'ABI' 'MD' 'DE' 'SAW' 'SLK']


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  an['Land- kreis'].replace(dict_an, inplace=True)


### Schleswig-Holstein:


In [213]:
url_ho="https://de.wikipedia.org/wiki/Liste_von_Windkraftanlagen_in_Schleswig-Holstein"
# use requests.get() method with the provided static_url
response_ho=requests.get(url_ho).text
# assign the response to a object
soup_ho=BeautifulSoup(response_ho, 'html.parser')
# Use soup.title attribute
print('The title is:')
for title in soup_ho.find_all('title'):
    print(title.get_text())
    
html_tables=soup_ho.find_all('table')
table_ho = html_tables[0]

column_names = []

# Apply find_all() function with `th` element on first_launch_table
# Iterate each th element and apply the provided extract_column_from_header() to get a column name
# Append the Non-empty column name (`if name is not None and len(name) > 0`) into a list called column_names

columns=table_ho.find_all('th')
for column in columns:
    name=extract_column_from_header(column)
    
    if name!= None:
        if len(name)>0:
            column_names.append(name)
    
print(column_names)

The title is:
Liste von Windkraftanlagen in Schleswig-Holstein – Wikipedia
['Name', 'Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Koordinaten', 'Projektierer / Betreiber', 'Bemerkungen']


In [216]:
rows_ho=[]    
for row in table_ho.find_all('tr'):
        td = row.find_all('td')
        row = [row.text for row in td]
        rows_ho.append(row)
        
        
print(len(rows_ho))




table_dict= dict.fromkeys(column_names)



# Let's initial the table_dict with each value to be an empty list
table_dict['Name'] = []
table_dict['Baujahr'] = []
table_dict['Gesamt- leistung (MW)'] = []
table_dict['Anzahl'] = []
table_dict['Typ (WKA)'] = []
table_dict['Ort'] = []
table_dict['Land- kreis'] = []

table_dict['Koordinaten']=[]
table_dict['Projektierer / Betreiber']=[]
table_dict['Bemerkungen']=[]


#filling the dictionaries:



for i in rows_ho[1:len(rows_ho)]:
    if i[0]!=None:
        name=i[0].strip()
        name=re.sub(r'\[\d+\]', '', name)
        table_dict['Name'].append(name)
        
    years=x[1].strip()
    table_dict['Baujahr'].append(years)
    table_dict['Baujahr'] = [x[0:4] for x in table_dict['Baujahr']]
    
    table_dict['Gesamt- leistung (MW)'].append(i[2].strip())
    table_dict['Anzahl'].append(i[3].strip())
    table_dict['Typ (WKA)'].append(i[4].strip())
    table_dict['Ort'].append(i[5].strip())
    table_dict['Land- kreis'].append(i[6].strip())

    table_dict['Koordinaten'].append(i[7].strip())
    table_dict['Projektierer / Betreiber'].append(i[8].strip())
    table_dict['Bemerkungen'].append(i[-1].strip())                                             
  
    

493


In [220]:
df_ho= pd.DataFrame({ key:pd.Series(value) for key, value in table_dict.items() })


#splitting the coordinates:
fn = lambda x: pd.Series([i for i in (x.split(','))])
coord = df_ho['Koordinaten'].apply(fn)
coord.rename(columns={0:'Latitude',1:'Longitude'},inplace=True)
df_ho['Latitude']=coord['Latitude']
df_ho['Longitude']=coord['Longitude']


#remove not existing wiondparks
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Am Kählerhof'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlagen Apeldör'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlagen Arkebek'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Ascheberg'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Aventoft'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Bargenstedt'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlagen Barlteraltendeich'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlagen Beidenfleth'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlagen Blieschendorf'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Boksee'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Bögelhuusfeld'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Dellweg'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlagen Eesch'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Eckernstrom'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Evensbüller Chaussee'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlagen Fedderingen'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Flintbek'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Fäkalstraße'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Garbek'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlagen Garding'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlagen Grüner Deich'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Tating-Geestgraben'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Geesthacht-Pumpwerk['].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Glücksburg-Bremsbergallee'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlagen Gollendorf'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Goosefeld'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Gremmerup'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Haffdeich'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Harmswöhrden'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Heide'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Helgoland'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlagen Hollingstedt'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Klausdorf'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Kläranlage Meldorf'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Kleihörn'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Köhnholz-Ost'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Kömdiek'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Königsacker'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Krogaspe'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlagen Lehe'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Malente'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Neuberend'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Neudorf-Bornstein'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Neufresenburg'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Neuhof'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Neustadt-Rettin'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Niebüll'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Nienborstel I'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Nordhastedt'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Norderheistedt'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Oersberg'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Oeverwisch'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Osterhof'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Oster-Ohrstedtfeld'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Passade'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Porrenkoog-Sielzug'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlagen Poppenbüll'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Possfeld'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Püttsee'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Rader Insel'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Reesdorf'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Sankt Michaelisdonn-Klärwerk'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Sankt Michaelisdonn-Moorstrich'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlagen Sankt Peter-Ording'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Scharfenstein'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Schnarup'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Schöpfwerk Adamsiel'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlagen Schülper Feld'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Stedesand'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Sönke-Nissen-Koog'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Süderhastedt'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Süderholm'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlagen Süderschmedeby'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Trendermarschweg'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Ulsnis'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Unewatt'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Vadersdorf'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlagen Viöl'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Wedel-Yachthafen'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlagen Westermarkelsdorf'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Westeroffenbülldeich'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlagen Wewelsfleth'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Wolmersdorf'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windkraftanlage Ünnern Diek'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windpark Avendorf'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windpark Brodten'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windpark Elpersbüttel'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windpark Glüsing'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windpark Hennstedt'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windpark Immenstedt'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windpark Petersdorf'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windpark Tammensiel'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windpark Vollerwiek'].index, inplace=True)
df_ho.drop(df_ho.loc[df_ho['Name']=='Windpark Wyk auf Föhr'].index, inplace=True)


df_ho.set_index('Name', inplace=True)
df_ho['Land']='Sch-H'

df_ho['Anzahl'] = [int(str(i)) for i in df_ho['Anzahl']]
df_ho['Gesamt- leistung (MW)'] = [float(str(i).replace(",", ".")) for i in df_ho['Gesamt- leistung (MW)']]


df_ho.head(3)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Koordinaten,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude,Land
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Windkraftanlage Ahrenshöft,2010,1.5,1,Vensys 77/1500 (1×),Ahrenshöft,NF,"54° 33′ 39″ N, 9° 5′ 16″ O",Ahrenshöfter Naturkraftwerke,Von 1998 bis 2003 stand im Windpark eine weite...,54° 33′ 39″ N,9° 5′ 16″ O,Sch-H
Windkraftanlage Albersdorf Süd,2010,5.56,1,Enercon E-160 EP5 E3 (1×),Albersdorf,HEI,"54° 7′ 18″ N, 9° 15′ 39″ O",WP Schlaa,errichtet an der A 23,54° 7′ 18″ N,9° 15′ 39″ O,Sch-H
Windkraftanlagen Arup,2010,1.0,2,Nordtank NTK 500/41 (2×),Struxdorf,SL,"54° 37′ 20″ N, 9° 38′ 3″ O",Westangler Windenergie,,54° 37′ 20″ N,9° 38′ 3″ O,Sch-H


In [221]:
df_ho.info()

<class 'pandas.core.frame.DataFrame'>
Index: 399 entries, Windkraftanlage Ahrenshöft to Windtestfeld Nord
Data columns (total 12 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Baujahr                   399 non-null    object 
 1   Gesamt- leistung (MW)     399 non-null    float64
 2   Anzahl                    399 non-null    int64  
 3   Typ (WKA)                 399 non-null    object 
 4   Ort                       399 non-null    object 
 5   Land- kreis               399 non-null    object 
 6   Koordinaten               399 non-null    object 
 7   Projektierer / Betreiber  399 non-null    object 
 8   Bemerkungen               399 non-null    object 
 9   Latitude                  399 non-null    object 
 10  Longitude                 399 non-null    object 
 11  Land                      399 non-null    object 
dtypes: float64(1), int64(1), object(10)
memory usage: 40.5+ KB


In [222]:
orts_ho=df_ho.groupby(['Land- kreis']).agg({'Anzahl':'sum'}).sort_values(by='Anzahl',ascending=False)
orts_ho.head(10)

Unnamed: 0_level_0,Anzahl
Land- kreis,Unnamed: 1_level_1
HEI,854
NF,854
SL,521
OH,332
IZ,306
RD,224
SE,76
RZ,63
OD,43
PLÖ,41


In [223]:
orts_ho_MW=df_ho.groupby(['Land- kreis']).agg({'Gesamt- leistung (MW)':'sum'}).sort_values(by='Gesamt- leistung (MW)',ascending=False)
orts_ho_MW.head(10)

Unnamed: 0_level_0,Gesamt- leistung (MW)
Land- kreis,Unnamed: 1_level_1
NF,2480.54
HEI,2312.723
SL,1448.22
IZ,822.9
OH,759.975
RD,653.405
SE,308.98
RZ,104.55
OHSE,91.5
SG,67.5


In [236]:
ho=df_ho[['Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Projektierer / Betreiber', 'Bemerkungen', 'Latitude', 'Longitude']]
ho.head(3)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Windkraftanlage Ahrenshöft,2010,1.5,1,Vensys 77/1500 (1×),Ahrenshöft,NF,Ahrenshöfter Naturkraftwerke,Von 1998 bis 2003 stand im Windpark eine weite...,54° 33′ 39″ N,9° 5′ 16″ O
Windkraftanlage Albersdorf Süd,2010,5.56,1,Enercon E-160 EP5 E3 (1×),Albersdorf,HEI,WP Schlaa,errichtet an der A 23,54° 7′ 18″ N,9° 15′ 39″ O
Windkraftanlagen Arup,2010,1.0,2,Nordtank NTK 500/41 (2×),Struxdorf,SL,Westangler Windenergie,,54° 37′ 20″ N,9° 38′ 3″ O


In [368]:
print(ho['Land- kreis'].unique())

dict_ho={
        'OHSE':'OH'}

ho['Land- kreis'].replace(dict_ho, inplace=True)
print(ho['Land- kreis'].unique())

['NF' 'HEI' 'SL' 'OD' 'RD' 'IZ' 'FL' 'OH' 'RZ' 'SE' 'PI' 'HL' 'PLÖ' 'KI'
 'SG' 'OHSE']
['NF' 'HEI' 'SL' 'OD' 'RD' 'IZ' 'FL' 'OH' 'RZ' 'SE' 'PI' 'HL' 'PLÖ' 'KI'
 'SG']


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ho['Land- kreis'].replace(dict_ho, inplace=True)


### Thuringen:

In [224]:
url_th="https://de.wikipedia.org/wiki/Liste_von_Windkraftanlagen_in_Thüringen"
# use requests.get() method with the provided static_url
response_th=requests.get(url_th).text
# assign the response to a object
soup_th=BeautifulSoup(response_th, 'html.parser')
# Use soup.title attribute
print('The title is:')
for title in soup_th.find_all('title'):
    print(title.get_text())
    
html_tables=soup_th.find_all('table')
table_th = html_tables[0]

column_names = []

# Apply find_all() function with `th` element on first_launch_table
# Iterate each th element and apply the provided extract_column_from_header() to get a column name
# Append the Non-empty column name (`if name is not None and len(name) > 0`) into a list called column_names

columns=table_th.find_all('th')
for column in columns:
    name=extract_column_from_header(column)
    
    if name!= None:
        if len(name)>0:
            column_names.append(name)
    
print(column_names)


rows_th=[]    
for row in table_th.find_all('tr'):
        td = row.find_all('td')
        row = [row.text for row in td]
        rows_th.append(row)
        
        
print(len(rows_th))



The title is:
Liste von Windkraftanlagen in Thüringen – Wikipedia
['Name', 'Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Koordinaten', 'Projektierer / Betreiber', 'Bemerkungen']
102


In [225]:
rows_th=[]    
for row in table_th.find_all('tr'):
        td = row.find_all('td')
        row = [row.text for row in td]
        rows_th.append(row)
        
        
print(len(rows_th))




table_dict= dict.fromkeys(column_names)



# Let's initial the table_dict with each value to be an empty list
table_dict['Name'] = []
table_dict['Baujahr'] = []
table_dict['Gesamt- leistung (MW)'] = []
table_dict['Anzahl'] = []
table_dict['Typ (WKA)'] = []
table_dict['Ort'] = []
table_dict['Land- kreis'] = []

table_dict['Koordinaten']=[]
table_dict['Projektierer / Betreiber']=[]
table_dict['Bemerkungen']=[]


#filling the dictionaries:



for i in rows_th[1:len(rows_th)]:
    if i[0]!=None:
        name=i[0].strip()
        name=re.sub(r'\[\d+\]', '', name)
        table_dict['Name'].append(name)
        
    years=x[1].strip()
    table_dict['Baujahr'].append(years)
    table_dict['Baujahr'] = [x[0:4] for x in table_dict['Baujahr']]
    
    table_dict['Gesamt- leistung (MW)'].append(i[2].strip())
    table_dict['Anzahl'].append(i[3].strip())
    table_dict['Typ (WKA)'].append(i[4].strip())
    table_dict['Ort'].append(i[5].strip())
    table_dict['Land- kreis'].append(i[6].strip())

    table_dict['Koordinaten'].append(i[7].strip())
    table_dict['Projektierer / Betreiber'].append(i[8].strip())
    table_dict['Bemerkungen'].append(i[-1].strip())                                             
  
    

102


In [227]:
df_th= pd.DataFrame({ key:pd.Series(value) for key, value in table_dict.items() })

#splitting the coordinates:
fn = lambda x: pd.Series([i for i in (x.split(','))])
coord = df_th['Koordinaten'].apply(fn)
coord.rename(columns={0:'Latitude',1:'Longitude'},inplace=True)
df_th['Latitude']=coord['Latitude']
df_th['Longitude']=coord['Longitude']


df_th.set_index('Name', inplace=True)
df_th['Land']='Th'

df_th['Anzahl'] = [int(str(i)) for i in df_th['Anzahl']]
df_th['Gesamt- leistung (MW)'] = [float(str(i).replace(",", ".")) for i in df_th['Gesamt- leistung (MW)']]


df_th.head(3)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Koordinaten,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude,Land
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Windkraftanlage Breitungen/Werra,2010,0.6,1,Tacke TW 600e (1×),Breitungen-Winne,SM,"50° 45′ 22″ N, 10° 21′ 19″ O",,Sanierung 2010,50° 45′ 22″ N,10° 21′ 19″ O,Th
Windkraftanlage Cretzschwitz,2010,2.35,1,Enercon E-92 (1×),Cretzschwitz,G,"50° 56′ 12″ N, 12° 6′ 24″ O",GoEn Planungsgesellschaft,,50° 56′ 12″ N,12° 6′ 24″ O,Th
Windkraftanlage Diedorf,2010,0.6,1,Micon M 1500-600 (1×),Diedorf,UH,"51° 11′ 22″ N, 10° 17′ 32″ O",,,51° 11′ 22″ N,10° 17′ 32″ O,Th


In [228]:
orts_th=df_th.groupby(['Land- kreis']).agg({'Anzahl':'sum'}).sort_values(by='Anzahl',ascending=False)
orts_th.head(10)

Unnamed: 0_level_0,Anzahl
Land- kreis,Unnamed: 1_level_1
SÖM,103
UH,85
WAK,83
KYF,83
ABG,64
GTHUH,64
SHK,58
EIC,57
AP,50
EF,36


In [229]:
orts_th_MW=df_th.groupby(['Land- kreis']).agg({'Gesamt- leistung (MW)':'sum'}).sort_values(by='Gesamt- leistung (MW)',ascending=False)
orts_th_MW.head(10)

Unnamed: 0_level_0,Gesamt- leistung (MW)
Land- kreis,Unnamed: 1_level_1
SÖM,270.95
UH,208.85
KYF,180.65
GTHUH,158.85
WAK,133.87
SHK,117.7
ABG,113.3
EIC,100.5
AP,93.5
NDH,83.75


In [235]:
th=df_th[['Baujahr', 'Gesamt- leistung (MW)', 'Anzahl', 'Typ (WKA)', 'Ort', 'Land- kreis', 'Projektierer / Betreiber', 'Bemerkungen', 'Latitude', 'Longitude']]
th.head(3)

Unnamed: 0_level_0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Windkraftanlage Breitungen/Werra,2010,0.6,1,Tacke TW 600e (1×),Breitungen-Winne,SM,,Sanierung 2010,50° 45′ 22″ N,10° 21′ 19″ O
Windkraftanlage Cretzschwitz,2010,2.35,1,Enercon E-92 (1×),Cretzschwitz,G,GoEn Planungsgesellschaft,,50° 56′ 12″ N,12° 6′ 24″ O
Windkraftanlage Diedorf,2010,0.6,1,Micon M 1500-600 (1×),Diedorf,UH,,,51° 11′ 22″ N,10° 17′ 32″ O


In [383]:
print(th['Land- kreis'].unique())

dict_th={'SHKAP':'SHK',
        'KYFSÖM':'KYF',
        'KYFUH':'KYF',
        'APSLF':'AP',
        'WAKGTH':'WAK',
        'GTHUH':'GTH',
        'SÖ':'SÖM'}

th['Land- kreis'].replace(dict_th, inplace=True)
print(th['Land- kreis'].unique())

['SM' 'G' 'UH' 'WAK' 'EIC' 'SOK' 'SHK' 'GRZ' 'SÖ' 'ABG' 'SLF' 'HBN' 'SÖM'
 'AP' 'GTH' 'KYF' 'EF' 'NDH' 'IK']
['SM' 'G' 'UH' 'WAK' 'EIC' 'SOK' 'SHK' 'GRZ' 'SÖM' 'ABG' 'SLF' 'HBN' 'AP'
 'GTH' 'KYF' 'EF' 'NDH' 'IK']


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  th['Land- kreis'].replace(dict_th, inplace=True)


In [403]:
landes_list=[bw, th, ho, an, s , sa, rp, nw, bayern, mvp, hbh, he, br]
landes=pd.concat(landes_list)
landes.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4254 entries, Forschungswindkraftanlage RedoxWind to Windpark Zossen
Data columns (total 10 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Baujahr                   4254 non-null   object
 1   Gesamt- leistung (MW)     4254 non-null   object
 2   Anzahl                    4254 non-null   object
 3   Typ (WKA)                 4254 non-null   object
 4   Ort                       4254 non-null   object
 5   Land- kreis               4254 non-null   object
 6   Projektierer / Betreiber  4254 non-null   object
 7   Bemerkungen               4254 non-null   object
 8   Latitude                  4254 non-null   object
 9   Longitude                 4254 non-null   object
dtypes: object(10)
memory usage: 365.6+ KB


In [404]:
landes.head(3)

Unnamed: 0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude
Forschungswindkraftanlage RedoxWind,2017,2.0,1,Qreon Q82 (1×),Karlsruhe,KA,Fraunhofer ICT,Forschungsprojekt mit kombiniertem Batteriespe...,49° 1′ 9″ N,8° 31′ 4″ O
Windkraftanlage Alexanderschanze,2003,1.5,1,Südwind S70 (1×),Bad Peterstal-Griesbach,OG,"Schmalz, Windenergieanlage „Bei der Schanz“",errichtet an der Schwarzwaldhochstraße im Bere...,48° 28′ 22″ N,8° 16′ 7″ O
Windkraftanlage Alpirsbach,1999,2.3,1,Enercon E-70 E4 (1×),Römlinsdorf,FDS,"Windkraft Römlinsdorf, Stadtwerke Stuttgart",Repowering 2013 (1× Enercon E-70 E4 statt 1× F...,48° 20′ 0″ N,8° 28′ 34″ O


In [405]:
data=pd.read_csv('Landkreis_ID.csv', index_col=False)
data.head(5)

Unnamed: 0,Code,NUTS 1.1,NUTS 2.1,Name,Land- kreis
0,DEB12,Rheinland-Pfalz,Koblenz,Ahrweiler,AW
1,DE275,Bayern,Schwaben,Aichach-Friedberg,AIC
2,DE145,Baden-Württemberg,Tübingen,Alb-Donau-Kreis,UL
3,DEG0M,Thüringen,Thüringen,Altenburger Land,ABG
4,DEB13,Rheinland-Pfalz,Koblenz,Altenkirchen (Westerwald),AK


In [406]:
data.to_csv('Windanlage_DE.csv')

In [419]:
new_data=pd.merge(landes,data, how='left', on='Land- kreis')

In [420]:
new_data['Code'].loc[new_data['Land- kreis']=='CB']='DE402'
new_data['Code'].loc[new_data['Land- kreis']=='HOM']='DEC05'
new_data['NUTS 1.1'].loc[new_data['Land- kreis']=='CB']='DE4'
new_data['NUTS 1.1'].loc[new_data['Land- kreis']=='HOM']='DEC'

new_data['NUTS 2.1'].loc[new_data['Land- kreis']=='CB']='DE40'
new_data['NUTS 2.1'].loc[new_data['Land- kreis']=='HOM']='DEC0'
new_data['Name'].loc[new_data['Land- kreis']=='CB']='Cottbus, Kreisfreie Stadt'
new_data['Name'].loc[new_data['Land- kreis']=='HOM']='Saarpfalz-Kreis'



In [421]:
new_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4693 entries, 0 to 4692
Data columns (total 14 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Baujahr                   4693 non-null   object
 1   Gesamt- leistung (MW)     4693 non-null   object
 2   Anzahl                    4693 non-null   object
 3   Typ (WKA)                 4693 non-null   object
 4   Ort                       4693 non-null   object
 5   Land- kreis               4693 non-null   object
 6   Projektierer / Betreiber  4693 non-null   object
 7   Bemerkungen               4693 non-null   object
 8   Latitude                  4693 non-null   object
 9   Longitude                 4693 non-null   object
 10  Code                      4693 non-null   object
 11  NUTS 1.1                  4693 non-null   object
 12  NUTS 2.1                  4693 non-null   object
 13  Name                      4693 non-null   object
dtypes: object(14)
memory usa

In [422]:
new_data['Anzahl'] = [int(str(i)) for i in new_data['Anzahl']]
new_data['Gesamt- leistung (MW)'] = [float(str(i).replace(",", ".")) for i in new_data['Gesamt- leistung (MW)']]



new_data_numbers=new_data.groupby(['Land- kreis']).agg({'Anzahl':'sum'}).sort_values(by='Anzahl',ascending=False)
new_data_numbers.head(10)

Unnamed: 0_level_0,Anzahl
Land- kreis,Unnamed: 1_level_1
HEI,854
NF,854
HRO,702
UM,667
PR,577
PB,544
SL,521
CUX,516
EL,499
AN,490


In [426]:
new_data_mw=new_data.groupby(['Land- kreis']).agg({'Gesamt- leistung (MW)':'sum'}).sort_values(by='Gesamt- leistung (MW)',ascending=False)
new_data_mw.head(10)

Unnamed: 0_level_0,Gesamt- leistung (MW)
Land- kreis,Unnamed: 1_level_1
NF,2480.54
HEI,2312.723
UM,1666.85
HRO,1591.12
SL,1448.22
PB,1358.505
EL,1253.45
PR,1240.7
OS,1132.78
AN,1035.16


In [431]:

new_data['Latitude'] = new_data['Latitude'].str.split('N', n=1).str.get(0)
new_data['Longitude'] = new_data['Longitude'].str.split('O', n=1).str.get(0)
new_data.head()

Unnamed: 0,Baujahr,Gesamt- leistung (MW),Anzahl,Typ (WKA),Ort,Land- kreis,Projektierer / Betreiber,Bemerkungen,Latitude,Longitude,Code,NUTS 1.1,NUTS 2.1,Name
0,2017,2.0,1,Qreon Q82 (1×),Karlsruhe,KA,Fraunhofer ICT,Forschungsprojekt mit kombiniertem Batteriespe...,49° 1′ 9″,8° 31′ 4″,DE123,Baden-Württemberg,Karlsruhe,"Karlsruhe, Landkreis"
1,2017,2.0,1,Qreon Q82 (1×),Karlsruhe,KA,Fraunhofer ICT,Forschungsprojekt mit kombiniertem Batteriespe...,49° 1′ 9″,8° 31′ 4″,DE122,Baden-Württemberg,Karlsruhe,"Karlsruhe, Stadtkreis"
2,2003,1.5,1,Südwind S70 (1×),Bad Peterstal-Griesbach,OG,"Schmalz, Windenergieanlage „Bei der Schanz“",errichtet an der Schwarzwaldhochstraße im Bere...,48° 28′ 22″,8° 16′ 7″,DE134,Baden-Württemberg,Freiburg,Ortenaukreis
3,1999,2.3,1,Enercon E-70 E4 (1×),Römlinsdorf,FDS,"Windkraft Römlinsdorf, Stadtwerke Stuttgart",Repowering 2013 (1× Enercon E-70 E4 statt 1× F...,48° 20′ 0″,8° 28′ 34″,DE12C,Baden-Württemberg,Karlsruhe,Freudenstadt
4,1990,0.095,1,Reymo DANmark 22 (1×),Hausen vor Wald,VS,Pionier-Windmühle Auenberg,(*); erste kommerziell betriebene Windkraftanl...,47° 53′ 19″,8° 28′ 24″,DE136,Baden-Württemberg,Freiburg,Schwarzwald-Baar-Kreis


In [433]:
new_data_coord=new_data.groupby(['Latitude','Longitude', 'Land- kreis', 'Name', 'Code']).agg({'Anzahl':'sum'}).sort_values(by='Anzahl',ascending=False)
new_data_coord.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Anzahl
Latitude,Longitude,Land- kreis,Name,Code,Unnamed: 5_level_1
53° 36′ 37″,7° 25′ 45″,WTM,Wittmund,DE94H,144
53° 58′ 26″,8° 56′ 0″,HEI,Dithmarschen,DEF05,135
51° 52′ 34″,13° 31′ 15″,LIF,Lichtenfels,DE24C,114
52° 0′ 3″,12° 49′ 31″,AN,"Ansbach, Kreisfreie Stadt",DE251,114
52° 0′ 3″,12° 49′ 31″,AN,"Ansbach, Landkreis",DE256,114
53° 21′ 24″,13° 56′ 0″,UM,Uckermark,DE40I,113
54° 36′ 24″,8° 54′ 8″,NF,Nordfriesland,DEF07,88
51.638056°,8.915°,PB,Paderborn,DEA47,85
51° 7′ 32″,11° 57′ 31″,BLK,Burgenlandkreis,DEE08,82
52° 32′ 14″,12° 52′ 11″,HVL,Havelland,DE408,82


In [434]:
new_data.to_csv('Landkreis_id-windanlage.csv')

In [436]:
new_data_coord.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 4692 entries, ('53°\xa036′\xa037″\xa0', ' 7°\xa025′\xa045″\xa0', 'WTM', 'Wittmund', 'DE94H') to ('50.886667°\xa0', ' 8.298056°\xa0', 'SI', 'Siegen-Wittgenstein', 'DEA5A')
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Anzahl  4692 non-null   int64
dtypes: int64(1)
memory usage: 152.6+ KB
