In [1]:
##############
#President Code
##############

import requests
import pandas as pd
from bs4 import BeautifulSoup
import re


def generate_raw_table(html_table):
    table = []
    for row in html_table.find_all('tr'):
        r=[]
        for pos, col in enumerate(row.find_all('td')):
            if pos == 1:
                for pos, span in enumerate(col.find_all('span')):                    
                    if re.search(r'\]$', span.text):
                        r.append(span.text[:-3])
                    else:
                        r.append(span.text)                    
            elif pos == 3:
                r.append(col.find_all('a')[0].text)
            elif pos == 6:
                try:
                    r.append(col.find('i').text)
                except AttributeError:
                    r.append(col.find_all('a')[0].text)
        if r:
            table.append(r)
    return table

def create_dataframe_from_raw_table(raw_table):
    df = pd.DataFrame(raw_table, columns=['Presidency Start', 'Presidency End', 'President Name', 'Party'])
    df=df.dropna()
    df['Presidency End'] =pd.to_datetime(df['Presidency End'])
    df['Presidency Start'] =pd.to_datetime(df['Presidency Start'])
    return df

def generate_dates(start_date, days_count):
    datelist = pd.date_range(start_date, periods=days_count).tolist()
    return datelist

def append_presidency_rows(df):
    columns=['Presidency Start', 'Presidency End', 'President Name', 'Party']
    lst = []
    for index, row in df.iterrows():
        start_date = row['Presidency Start']
        diff_days = (row['Presidency End'] - start_date).days        
        datelist = generate_dates(start_date=start_date, days_count=diff_days)
        
        for d in datelist:        
            r = [d, row['Presidency End'], row['President Name'],row['Party']]
            lst.append(r)
    new_df = pd.DataFrame(lst, columns=columns)
    return new_df

# pick the last row's presidency End date
# get difference between presidency End date & today
# add diff rows in df, with current president name.

def add_current_president(df):
    columns=['Presidency Start', 'Presidency End', 'President Name', 'Party']
    last_row = df.tail(1)
    start_date = last_row['Presidency End'].iloc[0]    
    cur_date = pd.to_datetime("today")
    diff= (cur_date -  start_date).days   
    datelist = generate_dates(start_date=start_date, days_count=diff)    
    lst = []
    for d in datelist:
            r = [d, cur_date.strftime("%Y-%m-%d"), 'Donald Trump','Republican Party']            
            lst.append(r)
    
    ldf = pd.DataFrame(lst, columns=columns)
    ldf['Presidency End'] =pd.to_datetime(ldf['Presidency End'])
    ldf['Presidency Start'] =pd.to_datetime(ldf['Presidency Start'])
    
    df = df.append(ldf, ignore_index=True)
    return df

def main():
    url = 'https://en.wikipedia.org/wiki/List_of_Presidents_of_the_United_States'
    res = requests.get(url).text
    soup = BeautifulSoup(res,'lxml')
    html_table = soup.find('table',{'class':'wikitable'})
    
    raw_table = generate_raw_table(html_table)
    df = create_dataframe_from_raw_table(raw_table)
    df = append_presidency_rows(df)
    df = add_current_president(df)
    return df

#return main() function
#main()

#main().tail(9000)
df = pd.DataFrame(main().tail(10000))
#df

In [37]:
df.tail(3)

Unnamed: 0,Presidency Start,Presidency End,President Name,Party
79410,2018-11-08,2018-11-11,Donald Trump,Republican Party
79411,2018-11-09,2018-11-11,Donald Trump,Republican Party
79412,2018-11-10,2018-11-11,Donald Trump,Republican Party


In [2]:
# Import dataframe into MySQL
import sqlalchemy
from sqlalchemy import create_engine
kwargs = dict(
username = 'root',
password = 'password',
database_ip = 'localhost',
database_name = 'ecommercedb',
)

from sqlalchemy import create_engine
#engine = create_engine("mysql+pymysql://root:"+'password'+"@localhost/ecommercedb")

conn_string = "mysql+pymysql://{username}:{password}@{database_ip}/{database_name}".format(**kwargs)
engine = create_engine(conn_string)
df.to_sql(con=engine, if_exists='replace', index=False,name='pres')


NoSuchModuleError: Can't load plugin: sqlalchemy.dialects:mysql.pymysql