In [16]:
# Import the necessary library we need for our data preparation: pandas package:
import pandas as pd

In [17]:
# Read in csv file containing data from consumer survey:
survey = pd.read_csv('data/consumer_survey.csv')

In [18]:
# Show dataframe:
survey

Unnamed: 0,question,item,group,percent_yes
0,Current frequency of buying organic foods,Exclusively,total,3
1,Current frequency of buying organic foods,Exclusively,age_14-29,3
2,Current frequency of buying organic foods,Exclusively,age_30-49,3
3,Current frequency of buying organic foods,Exclusively,age_50+,3
4,Current frequency of buying organic foods,Exclusively,female,4
...,...,...,...,...
127,Reasons for buying organic foods,Products as natural as possible,age_14-29,87
128,Reasons for buying organic foods,Products as natural as possible,age_30-49,88
129,Reasons for buying organic foods,Products as natural as possible,age_50+,92
130,Reasons for buying organic foods,Products as natural as possible,female,90


In [19]:
# Show dataframe info:
survey.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 132 entries, 0 to 131
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   question     132 non-null    object
 1   item         132 non-null    object
 2   group        132 non-null    object
 3   percent_yes  132 non-null    int64 
dtypes: int64(1), object(3)
memory usage: 4.2+ KB


In [20]:
# We have no null values in our dataframe.

In [21]:
# We now want to upload the dataframe to the database on the server.

In [22]:
# Import sql_functions.py because we need some functions from that module:
import sql_functions as sqlf

# We need to restart the kernel and rerun at this point if we changed the module since we first imported it.

In [23]:
# Create a variable called engine using the get_engine function:
engine = sqlf.get_engine()

In [24]:
# We set the schema to our course name:
schema = 'capstone_organicfood'

# We set the table_name variable to our group name + the name of the dataframe:
table_name = 'consumer_survey'

In [25]:
# We need psycopg2 for raising possible error message:
import psycopg2

In [26]:
# Write records stored in the dataframe to SQL database:
if engine!=None:
    try:
        survey.to_sql(name=table_name, # name of SQL table variable
                        con=engine, # engine or connection
                        schema=schema, # our class schema variable
                        if_exists='replace', # Drop the table before inserting new values
                        index=False, # Write DataFrame index as a column
                        chunksize=5000, # Specify the number of rows in each batch to be written at a time
                        method='multi') # Pass multiple values in a single INSERT clause
        print(f"The {table_name} table was imported successfully.")
    # Error handling
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        engine = None
else:
    print('No engine')

The consumer_survey table was imported successfully.


In [27]:
# Test: query the newly created table to count the rows (we know from above that the dataframe has 132 cases):
sqlf.get_dataframe(f'SELECT COUNT(*) FROM {schema}.consumer_survey;')

Unnamed: 0,count
0,132


In [28]:
# Worked!