In [13]:
import pandas as pd
import psycopg2
from sql import engine

import warnings
warnings.filterwarnings('ignore')

In [14]:
# read test-data from csv-file
df = pd.read_csv('data/My_Free_WordStream_Keywords.csv')
df.head()

Unnamed: 0,Keyword,Google Ads Search Volume,Google Ads CPC,Google Ads Competition,Bing Ads Search Volume,Bing Ads CPC,Bing Ads Competition
0,bildungsgutschein data science,10,$0.01,High,0.0,$0.05,Low
1,bootcamp data,10,$11.2,Low,10.0,$0.05,High
2,bootcamp data scientist,4400,$23.91,Low,0.0,$0.05,Low
3,bootcamp web developer,720,$20.86,Medium,0.0,$0.05,Low
4,coding bootcamp data science,20,$25.44,Medium,170.0,$0.05,Low


In [15]:
# rename columns
df.columns = df.columns.str.lower()
df.columns = df.columns.str.replace(' ', '_')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 482 entries, 0 to 481
Data columns (total 7 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   keyword                   482 non-null    object 
 1   google_ads_search_volume  482 non-null    int64  
 2   google_ads_cpc            482 non-null    object 
 3   google_ads_competition    482 non-null    object 
 4   bing_ads_search_volume    480 non-null    float64
 5   bing_ads_cpc              480 non-null    object 
 6   bing_ads_competition      480 non-null    object 
dtypes: float64(1), int64(1), object(5)
memory usage: 26.5+ KB


In [16]:
# remove $-sign from column and change data type to float
df['google_ads_cpc'] = df['google_ads_cpc'].str.replace('$','')
df['bing_ads_cpc'] = df['bing_ads_cpc'].str.replace('$','')

# rename columns
df.rename(columns={'google_ads_cpc': 'google_ads_cpc_in_$', 'bing_ads_cpc': 'bing_ads_cpc_in_$'}, inplace=True)

# change object to float
df = df.astype({'google_ads_cpc_in_$':'float','bing_ads_cpc_in_$':'float'})

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 482 entries, 0 to 481
Data columns (total 7 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   keyword                   482 non-null    object 
 1   google_ads_search_volume  482 non-null    int64  
 2   google_ads_cpc_in_$       482 non-null    float64
 3   google_ads_competition    482 non-null    object 
 4   bing_ads_search_volume    480 non-null    float64
 5   bing_ads_cpc_in_$         480 non-null    float64
 6   bing_ads_competition      480 non-null    object 
dtypes: float64(3), int64(1), object(3)
memory usage: 26.5+ KB


In [17]:
df.head()

Unnamed: 0,keyword,google_ads_search_volume,google_ads_cpc_in_$,google_ads_competition,bing_ads_search_volume,bing_ads_cpc_in_$,bing_ads_competition
0,bildungsgutschein data science,10,0.01,High,0.0,0.05,Low
1,bootcamp data,10,11.2,Low,10.0,0.05,High
2,bootcamp data scientist,4400,23.91,Low,0.0,0.05,Low
3,bootcamp web developer,720,20.86,Medium,0.0,0.05,Low
4,coding bootcamp data science,20,25.44,Medium,170.0,0.05,Low


In [18]:
# write data to sql
table_name = 'test_data'
if engine!=None:
    try:
        df.to_sql(name=table_name, # Name of SQL table
                        con=engine, # Engine or connection
                        if_exists='replace', # Drop the table before inserting new values 
                        schema='capstone_group2', # Use schmea that was defined earlier
                        index=False, # Write DataFrame index as a column
                        chunksize=5000, # Specify the number of rows in each batch to be written at a time
                        method='multi') # Pass multiple values in a single INSERT clause
        print(f"The {table_name} table was imported successfully.")
    # Error handling
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        engine = None

The test_data table was imported successfully.
