In [1]:
# Import the necessary library we need for our data preparation: pandas package:
import pandas as pd

In [2]:
# Read in csv file containing data from consumer survey:
survey = pd.read_csv('data/consumer_survey.csv')

In [3]:
# Show dataframe:
survey

Unnamed: 0,question,item,total,age_14_29,age_30_49,age_50_x,female,male
0,Current frequency of buying organic foods,Exclusively,3%,3%,3%,3%,4%,3%
1,Current frequency of buying organic foods,Frequently,33%,30%,38%,31%,36%,29%
2,Current frequency of buying organic foods,Occasionally,49%,52%,43%,51%,47%,50%
3,Current frequency of buying organic foods,Never,15%,16%,16%,15%,12%,18%
4,Current frequency of buying organic foods,Top 2,36%,33%,41%,34%,40%,32%
5,Current frequency of buying organic foods,Top 3,85%,85%,84%,85%,87%,82%
6,Planned frequency of buying organic foods in t...,Exclusively,4%,3%,5%,4%,5%,3%
7,Planned frequency of buying organic foods in t...,Frequently,38%,40%,40%,35%,39%,35%
8,Planned frequency of buying organic foods in t...,Occasionally,47%,50%,41%,48%,46%,48%
9,Planned frequency of buying organic foods in t...,Never,12%,7%,14%,12%,10%,14%


In [4]:
# Show dataframe info:
survey.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22 entries, 0 to 21
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   question   22 non-null     object
 1   item       22 non-null     object
 2   total      22 non-null     object
 3   age_14_29  22 non-null     object
 4   age_30_49  22 non-null     object
 5   age_50_x   22 non-null     object
 6   female     22 non-null     object
 7   male       22 non-null     object
dtypes: object(8)
memory usage: 1.5+ KB


In [5]:
# We have no null values in our dataframe.

In [6]:
# We now want to upload the dataframe as a database to the server.

# Import sql_functions.py because we need some functions from that module:
import sql_functions as sqlf

# We need to restart the kernel and rerun at this point if we changed the module since we first imported it.

In [7]:
# Create a variable called engine using the get_engine function:
engine = sqlf.get_engine()

In [8]:
# We set the schema to our course name:
schema = 'hh_analytics_23_4'

# We set the table_name variable to our group name + the name of the dataframe:
table_name = 'organic_food_consumer_survey'

In [9]:
# We need psycopg2 for raising possible error message:
import psycopg2

In [10]:
# Write records stored in the dataframe to SQL database:
if engine!=None:
    try:
        survey.to_sql(name=table_name, # name of SQL table variable
                        con=engine, # engine or connection
                        schema=schema, # our class schema variable
                        if_exists='replace', # Drop the table before inserting new values
                        index=False, # Write DataFrame index as a column
                        chunksize=5000, # Specify the number of rows in each batch to be written at a time
                        method='multi') # Pass multiple values in a single INSERT clause
        print(f"The {table_name} table was imported successfully.")
    # Error handling
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        engine = None
else:
    print('No engine')

The organic_food_consumer_survey table was imported successfully.


In [12]:
# Test: query the newly created table to count the rows (we know from above that the dataframe has 22 cases):
sqlf.get_dataframe(f'SELECT COUNT(*) FROM {schema}.organic_food_consumer_survey;')

Unnamed: 0,count
0,22


In [None]:
# Worked!