# Sentiment Analysis - SQL Achemy

## A. Load Libraries


In [2]:
import pandas as pd
import numpy as np
import csv
from sqlalchemy import Column, String, Integer, ForeignKey, DateTime, func
from sqlalchemy.orm import relationship, backref
from sqlalchemy.ext.declarative import declarative_base

## B. Load 4 csv files

In [3]:
df_Conversation = pd.read_csv("C:\Programming\CustomerIntention\src\data\Conversation.csv", encoding = 'utf-8') 

In [None]:
df_Conversation.head()

In [5]:
df_Conversation_Information = pd.read_csv("C:\Programming\CustomerIntention\src\data\Conversation_Information.csv", encoding = 'utf-8') 

In [None]:
df_Conversation_Information.head()

In [7]:
df_Customer = pd.read_csv("C:\Programming\CustomerIntention\src\data\Customer.csv", encoding = 'utf-8') 

In [None]:
df_Customer.head()

In [9]:
df_Fan_Page = pd.read_csv("C:\Programming\CustomerIntention\src\data\Fan_Page.csv", encoding = 'utf-8') 

In [None]:
df_Fan_Page.head()

In [68]:
df_Conversation_Intention = pd.read_csv("C:\Programming\CustomerIntention\src\data\Conversation_Intention.csv", encoding = 'utf-8') 

In [None]:
df_Conversation_Intention.head()

In [70]:
df_Conversation_Entities = pd.read_csv("C:\Programming\CustomerIntention\src\data\Conversation_Entities.csv", encoding = 'utf-8') 

In [None]:
df_Conversation_Entities.head()

## Design tables with SQL Alchemy (demo only)

### Import SQL Alchemy

import sqlalchemy 
sqlalchemy.__version__ 

## C. Create an engine to access the localhost created in the Command Prompt run as administrator

In [11]:
from sqlalchemy import create_engine
engine = create_engine('mysql+mysqldb://phuongdaingo:0505@localhost:3306/customerintention?charset=utf8mb4', echo=True) 

## D. Design 6 tables 'Conversation', 'Conversation_Information', 'Customer', 'Fan_Page', 'Conversation_Intention', 'Conversation_Entities'

For reference of other methods of DataTime only:
https://stackoverflow.com/questions/13370317/sqlalchemy-default-datetime

Now let's execute this cell to start creating tables for TablePlus.

In [75]:
from sqlalchemy import Column, String, Integer, ForeignKey, DateTime, func, Boolean, MetaData, Table, Float
from sqlalchemy.dialects.mysql import TINYINT
from sqlalchemy.orm import relationship, backref
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base() 
metadata = MetaData(bind=engine) 

class Conversation(Base):
    __tablename__ = Table('conversation', Base.metadata,
                    autoload=True, autoload_with=engine) # metadata comes from database
    # Database (TablePlus) will regularize PK, Python won't do so (primary_key=True) since this is for mapping tables only. 
    # If Python is used for creating tables, we will need ID as a PK so 'primary_key=True' will be included.
    id = Column(Integer, primary_key=True) 
    conversation_id = Column(Integer)
    message = Column(String())
    order = Column(Integer) # must have data type, Integer doesn't need to have Integer(8)
    sender = Column(Integer) 
    fan_page_id = Column(Integer)
    cus_id = Column(Integer)
    
class Conversation_Information(Base):
    __tablename__ = Table('conversation_information', Base.metadata,
                    autoload=True, autoload_with=engine) 
    id = Column(Integer, primary_key=True)
    conversation_id = Column(Integer)
    customer_count = Column(Integer)
    sales_count = Column(Integer)
    start_time = Column(
        DateTime(timezone=True)) # https://stackoverflow.com/questions/13370317/sqlalchemy-default-datetime
    end_time = Column(
        DateTime(timezone=True))
    
class Customer(Base):
    __tablename__ = Table('customer', Base.metadata,
                    autoload=True, autoload_with=engine)
    id = Column(Integer, primary_key=True)
    cus_name = Column(String)
    cus_id = Column(Integer) # must not fix structure of the database here
    
class Fan_Page(Base):
    __tablename__ = Table('fan_page', Base.metadata,
                    autoload=True, autoload_with=engine)
    id = Column(Integer, primary_key=True)
    fan_page_name = Column(String)
    fan_page_id = Column(Integer)
    
class Conversation_Intention(Base):
    __tablename__ = Table('conversation_intention', Base.metadata,
                    autoload=True, autoload_with=engine)
    id = Column(Integer, primary_key=True)
    conversation_id = Column(Integer)
    intention_label = Column(String)
    intention_score = Column(Float)
    
class Conversation_Entities(Base):
    __tablename__ = Table('conversation_entities', Base.metadata,
                    autoload=True, autoload_with=engine)
    id = Column(Integer, primary_key=True)
    conversation_id = Column(Integer)
    conversation_entity = Column(String)
    conversation_entity_score = Column(Float)
    conversation_entity_word = Column(String)
    
# Mapping classes with tables in TablePlus's databases
# Should not create tables by Python but TablePlus
from sqlalchemy.orm import sessionmaker
#Session = sessionmaker()
#Session.configure(bind=engine)
Session = sessionmaker(bind=engine) # writing queries requires session before executing queries
session = Session() # object
#Base.metadata.create_all(engine)

### Print the current row of each table

In [63]:
conversation_results = session.query(Conversation)
conversation_results #list

<sqlalchemy.orm.query.Query at 0x20293831100>

In [None]:
for conversation in conversation_results: # each item of the object
    print(conversation.message)

In [65]:
conversation_information_results = session.query(Conversation_Information)
conversation_information_results #list

<sqlalchemy.orm.query.Query at 0x20277b926a0>

In [None]:
for conversation_information in conversation_information_results: 
    print(conversation_information.customer_count)

In [15]:
customer_results = session.query(Customer)
customer_results #list

<sqlalchemy.orm.query.Query at 0x214be973550>

In [None]:
for customer in customer_results:
    print(customer.cus_name)

In [69]:
fan_page_results = session.query(Fan_Page)
fan_page_results #list

<sqlalchemy.orm.query.Query at 0x20293825400>

In [None]:
for fan_page in fan_page_results: 
    print(fan_page.fan_page_name)

### Add new row(s) to the database

https://docs.sqlalchemy.org/en/14/tutorial/data_insert.html

ID is set as Primary Key which is auto-increment in TablePlus so I will not try to add a new row having ID in this code cell.

In [129]:
try:
    newConversation = Conversation(
        message = 'Could I ask you something?',
        order = '0',
        sender = '0'
    )
    session.add(newConversation)
        
    newConversationInformation = Conversation_Information(
        conversation_id = '1',
        customer_count = '1',
        sales_count = '0'
    )
    session.add(newConversationInformation)
    
    newCustomer = Customer(
        cus_name = 'Frank',
        cus_id = '3'
    )
    session.add(newCustomer)
    
    newFanPage = Fan_Page(
        fan_page_name = 'DEF',
        fan_page_id = '2'
    )
    session.add(newFanPage)
    
    session.commit() # commit once only
except:
    session.rollback() 

2021-09-15 00:56:40,618 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2021-09-15 00:56:40,624 INFO sqlalchemy.engine.Engine INSERT INTO conversation (message, `order`, sender) VALUES (%s, %s, %s)
2021-09-15 00:56:40,625 INFO sqlalchemy.engine.Engine [generated in 0.00196s] ('Could I ask you something?', '0', '0')
2021-09-15 00:56:40,633 INFO sqlalchemy.engine.Engine INSERT INTO conversation_information (conversation_id, customer_count, sales_count, start_time, end_time) VALUES (%s, %s, %s, now(), now())
2021-09-15 00:56:40,634 INFO sqlalchemy.engine.Engine [generated in 0.00101s] ('1', '1', '0')
2021-09-15 00:56:40,639 INFO sqlalchemy.engine.Engine INSERT INTO customer (cus_name, cus_id) VALUES (%s, %s)
2021-09-15 00:56:40,640 INFO sqlalchemy.engine.Engine [cached since 7943s ago] ('Frank', '3')
2021-09-15 00:56:40,644 INFO sqlalchemy.engine.Engine INSERT INTO fan_page (fan_page_name, fan_page_id) VALUES (%s, %s)
2021-09-15 00:56:40,645 INFO sqlalchemy.engine.Engine [generated in 0.00

In [None]:
try:     
    newConversationInformation = Conversation_Information(
        conversation_id = '2',
        customer_count = '2',
        sales_count = '2'
    )
    session.add(newConversationInformation)
    
    session.commit() # commit once only
except:
    session.rollback() 

In [12]:
df_Conversation.shape

(4024005, 8)

### Print them out again to see the differences

In [133]:
customer_results = session.query(Customer)
customer_results #list

<sqlalchemy.orm.query.Query at 0x25afa969a90>

In [None]:
for conversation in conversation_results:
    print(conversation.message)

In [135]:
conversation_information_results = session.query(Conversation_Information)
conversation_information_results #list

<sqlalchemy.orm.query.Query at 0x25afa9694f0>

In [None]:
for conversation_information in conversation_information_results:
    print(conversation_information.customer_count)

In [40]:
customer_results = session.query(Customer)
customer_results #list

<sqlalchemy.orm.query.Query at 0x20277ba6df0>

In [None]:
for customer in customer_results: 
    print(customer.cus_name)

In [139]:
fan_page_results = session.query(Fan_Page)
fan_page_results #list

<sqlalchemy.orm.query.Query at 0x25afa969460>

In [None]:
for fan_page in fan_page_results: 
    print(fan_page.fan_page_name)

## E. Insert all rows of each dataframe to database's tables in TablePlus

New Method: inserting directly from data frames

Inserting takes long time means that selecting or filtering will take less time, and in reverse, due to adding IDX for a column or different columns depending on purposes of saving data into relational database only or reading the data.

We will insert dataframes in batches into session (relational database), then commit to finalize saving. If an error happen, that batch will be stopped inserting and still stay in the session and other batches will not be entered into the session as well if flush() is placed outside 'for loop'. Therefore, if flush() is placed inside the for loop, batches will be flushed into the session regarless any error might occur. But we have to set rollback() in the except case to delete any existing batches in the session causing an error.

### Insert 'df_Conversation' dataframe into 'conversation' database

In [None]:
import time
#import mysql.connector # as below mysql, not sqlite3 for this case
import traceback
from tqdm import tqdm
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String,  create_engine # use sqlalchemy with connection string for mysql
from sqlalchemy.orm import scoped_session, sessionmaker
import unicodedata 

Base = declarative_base()
DBSession = scoped_session(sessionmaker()) # the scoped_session() function is provided which produces a thread-managed registry of Session objects. It is commonly used in web applications so that a single global variable can be used to safely represent transactional sessions with sets of objects, localized to a single thread.
engine = None

def init_sqlalchemy(dbname='mysql+mysqldb://phuongdaingo:0505@localhost:3306/customerintention?charset=utf8mb4'):
    global engine
    engine = create_engine(dbname, echo=False)
    DBSession.remove()
    DBSession.configure(bind=engine, autoflush=False, expire_on_commit=False)
    Base.metadata.drop_all(engine)
    Base.metadata.create_all(engine)

def conversation_sqlalchemy_orm(n=100000): 
    init_sqlalchemy()
    t0 = time.time() 
    error_i_list = [] # a new list containing i(s) of batch(es) causing errors
    # Index column must match with ID column of df_Conversation  > indexing to the row 10th iso using loop with iterows (time consuming), but by using range(df.rows) > take out the 10th row
    for i in tqdm(range(n)): # use tqdm to track progress 
        try: # create custome, then add into session
            conversation = Conversation()
            conversation.order = df_Conversation['Order'].iloc[i]
            conversation.sender = df_Conversation['Sender'].iloc[i]
            conversation.message = unicodedata.normalize('NFC', str(df_Conversation['Message'].iloc[i]).encode("utf-8").decode("utf-8"))
            conversation.fan_page_id = int(df_Conversation['Fanpage'].iloc[i]) # recreate the DB
            conversation.cus_id = df_Conversation['PSID'].iloc[i] 
            DBSession.add(conversation) # error might happen here or below
            DBSession.commit()
            
            if i % 1000 == 0: # when i reachs 1000 rows, it will execute by flush() to insert the batch of 1000 rows into the session of the relational database
                DBSession.flush() # should use try, except inside each 'for loop' to wrap i # error might happen here
                DBSession.commit() #2nd attempt: place commit() here, then compare the progress # commit here to insert batch without affecting other batch(es) with errors
                #text = unicodedata.normalize('NFC', text) # text: string type to fix error and replace all string texts into being wrapped by unicode 
                
        except Exception as er:
            print('Error at index {}: '.format(i))
            print(traceback.format_exc()) # print error(s)
            print('-' * 20)
            DBSession.rollback() # rollback here to delete all rows of a batch/batches causing errors to avoid being flooded or stuck with new batches coming in and then getting stuck as well
            error_i_list.append(i) # append into array the index of batch(es) causing errors 
   # DBSession.commit()  # 1st attempt: place commit() here, outside of 'for loop' # faster but will stop other batches coming in if errors happen
    
    print(
        "Conversation's SQLAlchemy ORM: Total time for " + str(n) +
        " records " + str(time.time() - t0) + " secs")

    # A new function to select rows from conversations with a condition filtering by cus_id, joining with table 'customer' to return the cus_name
#def join_tables():

if __name__ == '__main__':
    #conversation_sqlalchemy_orm(df_Conversation.shape[0]) # number of rows of df as I want --> customized function name
    conversation_sqlalchemy_orm(df_Conversation.shape[0])

### Insert 'df_Conversation_Information' dataframe into 'conversation_information' database

There will be an Error at index 260000 due to Duplicate entry '250001' for key 'IDX_conversation_id' so I have to delete this IDX of Conversation ID in the MYSQL by creating a new table for Conversation there.

In [32]:
import time
#import mysql.connector # as below mysql, not sqlite3 for this case
import traceback
from tqdm import tqdm
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String,  create_engine # use sqlalchemy with connection string for mysql
from sqlalchemy.orm import scoped_session, sessionmaker

Base = declarative_base()
DBSession = scoped_session(sessionmaker()) # the scoped_session() function is provided which produces a thread-managed registry of Session objects. It is commonly used in web applications so that a single global variable can be used to safely represent transactional sessions with sets of objects, localized to a single thread.
engine = None

def init_sqlalchemy(dbname='mysql+mysqldb://phuongdaingo:0505@localhost:3306/customerintention?charset=utf8mb4'):
    global engine
    engine = create_engine(dbname, echo=False)
    DBSession.remove()
    DBSession.configure(bind=engine, autoflush=False, expire_on_commit=False)
    Base.metadata.drop_all(engine)
    Base.metadata.create_all(engine)

def conversation_information_sqlalchemy_orm(n=100000): 
    init_sqlalchemy()
    t0 = time.time() 
    error_i_list = [] # a new list containing i(s) of batch(es) causing errors
    # Index column must match with ID column of df_Conversation  > indexing to the row 10th iso using loop with iterows (time consuming), but by using range(df.rows) > take out the 10th row
    for i in tqdm(range(n)): # use tqdm to track progress 
        try: # create custome, then add into session
            conversation_information = Conversation_Information()
            conversation_information.conversation_id = df_Conversation_Information['Conversation_ID'].iloc[i]
            conversation_information.customer_count = df_Conversation_Information['CustomerCount'].iloc[i]
            conversation_information.sales_count = df_Conversation_Information['SalesCount'].iloc[i]
            conversation_information.start_time = df_Conversation_Information['StartTime'].iloc[i] # google, insert 1 row only for trial
            conversation_information.end_time = df_Conversation_Information['EndTime'].iloc[i]
            
            DBSession.add(conversation_information) # error might happen here or below
            if i % 10000 == 0: # when i reachs 1000 rows, it will execute by flush() to insert the batch of 1000 rows into the session of the relational database
                DBSession.flush() # should use try, except inside each 'for loop' to wrap i # error might happen here
                DBSession.commit() #2nd attempt: place commit() here, then compare the progress # commit here to insert batch without affecting other batch(es) with errors
        except Exception as er:
            print('Error at index {}: '.format(i))
            print(traceback.format_exc()) # print error(s)
            print('-' * 20)
            DBSession.rollback() # rollback here to delete all rows of a batch/batches causing errors to avoid being flooded or stuck with new batches coming in and then getting stuck as well
            error_i_list.append(i) # append into array the index of batch(es) causing errors 
   # DBSession.commit()  # 1st attempt: place commit() here, outside of 'for loop' # faster but will stop other batches coming in if errors happen
    
    print(
        "Conversation_Information's SQLAlchemy ORM: Total time for " + str(n) +
        " records " + str(time.time() - t0) + " secs")

    # A new function to select rows from conversations with a condition filtering by cus_id, joining with table 'customer' to return the cus_name
#def join_tables():

if __name__ == '__main__':
    conversation_information_sqlalchemy_orm(df_Conversation_Information.shape[0]) # number of rows of df as I want --> customized function name

 84%|████████████████████████████████████████████████████████████▏           | 263006/314543 [05:06<00:12, 4210.42it/s]

Error at index 260000: 
Traceback (most recent call last):
  File "C:\Programming\CustomerIntention\venv\lib\site-packages\sqlalchemy\engine\base.py", line 1771, in _execute_context
    self.dialect.do_execute(
  File "C:\Programming\CustomerIntention\venv\lib\site-packages\sqlalchemy\engine\default.py", line 717, in do_execute
    cursor.execute(statement, parameters)
  File "C:\Programming\CustomerIntention\venv\lib\site-packages\MySQLdb\cursors.py", line 206, in execute
    res = self._query(query)
  File "C:\Programming\CustomerIntention\venv\lib\site-packages\MySQLdb\cursors.py", line 319, in _query
    db.query(q)
  File "C:\Programming\CustomerIntention\venv\lib\site-packages\MySQLdb\connections.py", line 259, in query
    _mysql.connection.query(self, query)
MySQLdb._exceptions.IntegrityError: (1062, "Duplicate entry '250001' for key 'IDX_conversation_id'")

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users

100%|█████████████████████████████████████████████████████████████████████████| 314543/314543 [06:07<00:00, 856.30it/s]

Conversation_Information's SQLAlchemy ORM: Total time for 314543 records 367.33158469200134 secs





### Insert 'df_Fan_Page' dataframe into 'fan_page' database

In [54]:
import time
#import mysql.connector # as below mysql, not sqlite3 for this case
import traceback
from tqdm import tqdm
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String,  create_engine # use sqlalchemy with connection string for mysql
from sqlalchemy.orm import scoped_session, sessionmaker
import unicodedata 

Base = declarative_base()
DBSession = scoped_session(sessionmaker()) # the scoped_session() function is provided which produces a thread-managed registry of Session objects. It is commonly used in web applications so that a single global variable can be used to safely represent transactional sessions with sets of objects, localized to a single thread.
engine = None

def init_sqlalchemy(dbname='mysql+mysqldb://phuongdaingo:0505@localhost:3306/customerintention?charset=utf8mb4'):
    global engine
    engine = create_engine(dbname, echo=False)
    DBSession.remove()
    DBSession.configure(bind=engine, autoflush=False, expire_on_commit=False)
    Base.metadata.drop_all(engine)
    Base.metadata.create_all(engine)
    
def fan_page_sqlalchemy_orm(n=100000): 
    init_sqlalchemy()
    t0 = time.time() 
    error_i_list = [] # a new list containing i(s) of batch(es) causing errors
    # Index column must match with ID column of df_Conversation  > indexing to the row 10th iso using loop with iterows (time consuming), but by using range(df.rows) > take out the 10th row
    for i in tqdm(range(n)): # use tqdm to track progress 
        try: # create custome, then add into session
            fan_page = Fan_Page()
            fan_page.fan_page_name = unicodedata.normalize('NFC', str(df_Fan_Page['FanpageName'].iloc[i]).encode("utf-8").decode("utf-8"))
            fan_page.fan_page_id = df_Fan_Page['Fanpage'].iloc[i]
            #fan_page.start_time = df_Fan_Page['StartTime'].iloc[i] # google, insert 1 row only for trial
            #fan_page.end_time =  df_Fan_Page['EndTime'].iloc[i]
            
            DBSession.add(fan_page) # error might happen here or below
            if i % 10000 == 0: # when i reachs 1000 rows, it will execute by flush() to insert the batch of 1000 rows into the session of the relational database
                DBSession.flush() # should use try, except inside each 'for loop' to wrap i # error might happen here
                DBSession.commit() #2nd attempt: place commit() here, then compare the progress # commit here to insert batch without affecting other batch(es) with errors
        except Exception as er:
            print('Error at index {}: '.format(i))
            print(traceback.format_exc()) # print error(s)
            print('-' * 20)
            DBSession.rollback() # rollback here to delete all rows of a batch/batches causing errors to avoid being flooded or stuck with new batches coming in and then getting stuck as well
            error_i_list.append(i) # append into array the index of batch(es) causing errors 
   # DBSession.commit()  # 1st attempt: place commit() here, outside of 'for loop' # faster but will stop other batches coming in if errors happen
    
    print(
        "Fan_Page's SQLAlchemy ORM: Total time for " + str(n) +
        " records " + str(time.time() - t0) + " secs")

    # A new function to select rows from conversations with a condition filtering by cus_id, joining with table 'customer' to return the cus_name
#def join_tables():

if __name__ == '__main__':
    fan_page_sqlalchemy_orm(df_Fan_Page.shape[0]) # number of rows of df as I want --> customized function name

100%|█████████████████████████████████████████████████████████████████████████| 314543/314543 [05:56<00:00, 883.28it/s]

Fan_Page's SQLAlchemy ORM: Total time for 314543 records 356.1085169315338 secs





### Insert 'df_Customer' dataframe into 'customer' database

In [22]:
import time
#import mysql.connector # as below mysql, not sqlite3 for this case
import traceback
from tqdm import tqdm
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String,  create_engine # use sqlalchemy with connection string for mysql
from sqlalchemy.orm import scoped_session, sessionmaker
import unicodedata 

Base = declarative_base()
DBSession = scoped_session(sessionmaker()) # the scoped_session() function is provided which produces a thread-managed registry of Session objects. It is commonly used in web applications so that a single global variable can be used to safely represent transactional sessions with sets of objects, localized to a single thread.
engine = None

def init_sqlalchemy(dbname='mysql+mysqldb://phuongdaingo:0505@localhost:3306/customerintention?charset=utf8mb4'):
    global engine
    engine = create_engine(dbname, echo=False)
    DBSession.remove()
    DBSession.configure(bind=engine, autoflush=False, expire_on_commit=False)
    Base.metadata.drop_all(engine)
    Base.metadata.create_all(engine)

def customer_sqlalchemy_orm(n=100000): 
    init_sqlalchemy()
    t0 = time.time() 
    error_i_list = [] # a new list containing i(s) of batch(es) causing errors
    # index column must match with ID column of df_Customer  > indexing to the row 10th iso using loop with iterows (time consuming), but by using range(df.rows) > take out the 10th row
    for i in tqdm(range(n)): # use tqdm to track progress 
        try: # create custome, then add into session
            customer = Customer()
            customer.cus_name = unicodedata.normalize('NFC', str(df_Customer['CusName'].iloc[i]).encode("utf-8").decode("utf-8")) # Use rows from dataframe to insert them into the relational databse, not insert the self-created rows
            #customer.cus_name = df_Customer['CusName'].iloc[i]
            customer.cus_id = df_Customer['PSID'].iloc[i]
            DBSession.add(customer) # error might happen here or below
            DBSession.commit()
            if i % 10000 == 0: # when i reachs 10000 rows, it will execute by flush() to insert the batch of 1000 rows into the session of the relational database
                DBSession.flush() # should use try, except inside each 'for loop' to wrap i # error might happen here
                DBSession.commit() #2nd attempt: place commit() here, then compare the progress # commit here to insert batch without affecting other batch(es) with errors
        except Exception as er:
            print('Error at index {}: '.format(i))
            print(traceback.format_exc()) # print error(s)
            print('-' * 20)
            DBSession.rollback() # rollback here to delete all rows of a batch/batches causing errors to avoid being flooded or stuck with new batches coming in and then getting stuck as well
            error_i_list.append(i) # append into array the index of batch(es) causing errors 
    #DBSession.commit()  # 1st attempt: place commit() here, outside of 'for loop' # faster but will stop other batches coming in if errors happen
    
    print(
        "Customer's SQLAlchemy ORM: Total time for " + str(n) +
        " records " + str(time.time() - t0) + " secs")

    # A new function to select rows from conversations with a condition filtering by cus_id, joining with table 'customer' to return the cus_name
#def join_tables():

if __name__ == '__main__':
    #customer_sqlalchemy_orm(df_Customer.shape[0]) # number of rows of df as I want --> customized function name
    customer_sqlalchemy_orm(df_Customer.shape[0])

100%|█████████████████████████████████████████████████████████████████████████| 314543/314543 [37:00<00:00, 141.67it/s]

Customer's SQLAlchemy ORM: Total time for 314543 records 2220.204922914505 secs





### Insert 'Conversation_Intention' dataframe into 'conversation_intention' database

In [None]:
import time
#import mysql.connector # as below mysql, not sqlite3 for this case
import traceback
from tqdm import tqdm
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String,  create_engine # use sqlalchemy with connection string for mysql
from sqlalchemy.orm import scoped_session, sessionmaker
import unicodedata 

Base = declarative_base()
DBSession = scoped_session(sessionmaker()) # the scoped_session() function is provided which produces a thread-managed registry of Session objects. It is commonly used in web applications so that a single global variable can be used to safely represent transactional sessions with sets of objects, localized to a single thread.
engine = None

def init_sqlalchemy(dbname='mysql+mysqldb://phuongdaingo:0505@localhost:3306/customerintention?charset=utf8mb4'):
    global engine
    engine = create_engine(dbname, echo=False)
    DBSession.remove()
    DBSession.configure(bind=engine, autoflush=False, expire_on_commit=False)
    Base.metadata.drop_all(engine)
    Base.metadata.create_all(engine)
    
def conversation_intention_sqlalchemy_orm(n=100000): 
    init_sqlalchemy()
    t0 = time.time() 
    error_i_list = [] # a new list containing i(s) of batch(es) causing errors
    # Index column must match with ID column of df_Conversation  > indexing to the row 10th iso using loop with iterows (time consuming), but by using range(df.rows) > take out the 10th row
    for i in tqdm(range(n)): # use tqdm to track progress 
        try: # create custome, then add into session
            conversation_intention = Conversation_Intention()
            conversation_intention.reference_id = df_Conversation_Intention['Conversation_ID'].iloc[i]
            conversation_intention.intention_label = unicodedata.normalize('NFC', str(df_Conversation_Intention['Intention_Label'].iloc[i]).encode("utf-8").decode("utf-8"))
            conversation_intention.intention_score = df_Conversation_Intention['Fanpage'].iloc[i]
            
            DBSession.add(conversation_intention) # error might happen here or below
            if i % 10000 == 0: # when i reachs 1000 rows, it will execute by flush() to insert the batch of 1000 rows into the session of the relational database
                DBSession.flush() # should use try, except inside each 'for loop' to wrap i # error might happen here
                DBSession.commit() #2nd attempt: place commit() here, then compare the progress # commit here to insert batch without affecting other batch(es) with errors
        except Exception as er:
            print('Error at index {}: '.format(i))
            print(traceback.format_exc()) # print error(s)
            print('-' * 20)
            DBSession.rollback() # rollback here to delete all rows of a batch/batches causing errors to avoid being flooded or stuck with new batches coming in and then getting stuck as well
            error_i_list.append(i) # append into array the index of batch(es) causing errors 
   # DBSession.commit()  # 1st attempt: place commit() here, outside of 'for loop' # faster but will stop other batches coming in if errors happen
    
    print(
        "Conversation_Intention's SQLAlchemy ORM: Total time for " + str(n) +
        " records " + str(time.time() - t0) + " secs")

    # A new function to select rows from conversations with a condition filtering by cus_id, joining with table 'customer' to return the cus_name
#def join_tables():

if __name__ == '__main__':
    conversation_intention_sqlalchemy_orm(df_Conversation_Intention.shape[0]) # number of rows of df as I want --> customized function name

### Insert 'Conversation_Entities' dataframe into 'conversation_entities' database

In [None]:
import time
#import mysql.connector # as below mysql, not sqlite3 for this case
import traceback
from tqdm import tqdm
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String,  create_engine # use sqlalchemy with connection string for mysql
from sqlalchemy.orm import scoped_session, sessionmaker
import unicodedata 

Base = declarative_base()
DBSession = scoped_session(sessionmaker()) # the scoped_session() function is provided which produces a thread-managed registry of Session objects. It is commonly used in web applications so that a single global variable can be used to safely represent transactional sessions with sets of objects, localized to a single thread.
engine = None

def init_sqlalchemy(dbname='mysql+mysqldb://phuongdaingo:0505@localhost:3306/customerintention?charset=utf8mb4'):
    global engine
    engine = create_engine(dbname, echo=False)
    DBSession.remove()
    DBSession.configure(bind=engine, autoflush=False, expire_on_commit=False)
    Base.metadata.drop_all(engine)
    Base.metadata.create_all(engine)
    
def conversation_entities_sqlalchemy_orm(n=100000): 
    init_sqlalchemy()
    t0 = time.time() 
    error_i_list = [] # a new list containing i(s) of batch(es) causing errors
    # Index column must match with ID column of df_Conversation  > indexing to the row 10th iso using loop with iterows (time consuming), but by using range(df.rows) > take out the 10th row
    for i in tqdm(range(n)): # use tqdm to track progress 
        try: # create custome, then add into session
            conversation_entities = Conversation_Entities()
            conversation_entities.reference_id = df_Conversation_Entities['Conversation_ID'].iloc[i]
            conversation_entities.conversation_entity = unicodedata.normalize('NFC', str(df_Conversation_Entities['Conversation_Entity'].iloc[i]).encode("utf-8").decode("utf-8"))
            conversation_entities.conversation_entity_score = df_Conversation_Entities['Conversation_Entity_Score'].iloc[i]
            conversation_entities.conversation_entity_word = unicodedata.normalize('NFC', str(df_Conversation_Entities['Conversation_Entity_word'].iloc[i]).encode("utf-8").decode("utf-8"))

            
            DBSession.add(conversation_entities) # error might happen here or below
            if i % 10000 == 0: # when i reachs 1000 rows, it will execute by flush() to insert the batch of 1000 rows into the session of the relational database
                DBSession.flush() # should use try, except inside each 'for loop' to wrap i # error might happen here
                DBSession.commit() #2nd attempt: place commit() here, then compare the progress # commit here to insert batch without affecting other batch(es) with errors
        except Exception as er:
            print('Error at index {}: '.format(i))
            print(traceback.format_exc()) # print error(s)
            print('-' * 20)
            DBSession.rollback() # rollback here to delete all rows of a batch/batches causing errors to avoid being flooded or stuck with new batches coming in and then getting stuck as well
            error_i_list.append(i) # append into array the index of batch(es) causing errors 
   # DBSession.commit()  # 1st attempt: place commit() here, outside of 'for loop' # faster but will stop other batches coming in if errors happen
    
    print(
        "Conversation_Entities's SQLAlchemy ORM: Total time for " + str(n) +
        " records " + str(time.time() - t0) + " secs")

    # A new function to select rows from conversations with a condition filtering by cus_id, joining with table 'customer' to return the cus_name
#def join_tables():

if __name__ == '__main__':
    conversation_entities_sqlalchemy_orm(df_Conversation_Entities.shape[0]) # number of rows of df as I want --> customized function name

## F. Select, Filter - Using query on session with joining method 

https://www.tutorialspoint.com/sqlalchemy/sqlalchemy_orm_working_with_joins.htm

https://docs.sqlalchemy.org/en/14/orm/query.html

### Filter the Conversation

In [None]:
stmt = session.query(Conversation).filter(Conversation.order == 0).all() #first(): get the first or all(): get all
for val in stmt:
    print(val.message)
    print(val.order)
    print(val.sender)

In [None]:
stmt_Conversation = session.query(Conversation).join(Fan_Page.fan_page_id).filter(Conversation.order == 0).all() #first(): get the first or all(): get all
for val in stmt_Conversation:
    print(val.message)
    print(val.order)
    print(val.sender)

### Filter the Conversation_Information

In [None]:
stmt = session.query(Conversation_Information).filter(Conversation_Information.conversation_id == 0).first() #first(): get the first or all(): get all
#for val in stmt:
    #print(val.conversation_id)
    #print(val.customer_count)
    #print(val.sales_count)
print(stmt.conversation_id)
print(stmt.customer_count)
print(stmt.sales_count)

Reference: https://stackoverflow.com/questions/51451768/sqlalchemy-querying-with-datetime-columns-to-filter-by-month-day-year

In [None]:
stmt = session.query(Conversation_Information).filter(Conversation_Information.start_time == '2019-11-03'
                                                     ).all() #first(): get the first or all(): get all
#for val in stmt:
    #print(val.conversation_id)
    #print(val.customer_count)
    #print(val.sales_count)
print(stmt.conversation_id)
print(stmt.customer_count)
print(stmt.sales_count)

In [None]:
stmt_Conversation_Info = session.query(Conversation_Information).join(Conversation.id).limit(5).filter(Conversation_Information.conversation_id == 0).all() #first(): get the first or all(): get all
for val in stmt_Conversation_Info:
    print(val.conversation_id)
    print(val.customer_count)
    print(val.sales_count)

### Filter the Customer

In [None]:
stmt = session.query(Customer).filter(Customer.cus_name == 'Simon').first() #first(): get the first or all(): get all
print(stmt.cus_id)
print(stmt.cus_name)
print(stmt.id)

In [None]:
def filter_Customer_name(name)
    stmt = session.query(Customer).filter(Customer.cus_name == name).first() #first(): get the first or all(): get all
    for val in stmt:
        print(val.cus_id)
        print(val.cus_name)
        print(val.id)
filter_Customer_name('Tòng thị tươi thuý')

In [None]:
stmt_Customer = session.query(Customer).join(Conversation.id).filter(Customer.cus_name == 'Simon').first() #first(): get the first or all(): get all
for val in stmt_Customer:
    print(val.cus_id)
    print(val.cus_name)
    print(val.message)

### Filter the Fan_Page

In [None]:
stmt = session.query(Fan_Page).filter(Fan_Page.fan_page_name == 'Simon').all() #first(): get the first or all(): get all
for val in stmt:
    print(val.fan_page_name)
    print(val.fan_page_id)

In [None]:
def filter_Fan_Page_name(name)    
    stmt = session.query(Fan_Page).filter(Fan_Page.fan_page_name == name).all() #first(): get the first or all(): get all
    for val in stmt:
        print(val.fan_page_name)
        print(val.fan_page_id)
filter_Fan_Page_name('Hân Beauty')

In [None]:
stmt_Fan_Page = session.query(Fan_Page).filter(Fan_Page.fan_page_name == 'Simon').all() #first(): get the first or all(): get all
for val in stmt_Fan_Page:
    print(val.fan_page_name)
    print(val.fan_page_id)
    print(val.message)

## G. Select, Filter with data directly created in the MySQL Relational Database

### Filter by query Conversation

In [None]:
stmt = session.query(Conversation).filter(Conversation.order == 0).all() #first(): get the first or all(): get all
for val in stmt:
    print(val.message)
    print(val.order)
    print(val.sender)

### Filter by query Conversation_Information

In [None]:
stmt = session.query(Conversation_Information).filter(Conversation_Information.conversation_id == 0).first() #first(): get the first or all(): get all
#for val in stmt:
    #print(val.conversation_id)
    #print(val.customer_count)
    #print(val.sales_count)
print(stmt.conversation_id)
print(stmt.customer_count)
print(stmt.sales_count)

### Filter by query Customer

In [None]:
stmt = session.query(Customer).filter(Customer.cus_name == 'Frank').first() #first(): get the first or all(): get all
print(stmt.cus_id)
print(stmt.cus_name)
print(stmt.id)

In [None]:
stmt = session.query(Customer).filter(Customer.cus_id % 2 == 0).all()
for val in stmt:
    print(val.cus_name)

### Filter by query Fan_Page

In [None]:
stmt = session.query(Fan_Page).filter(Fan_Page.fan_page_name == 'ABC').all() #first(): get the first or all(): get all
for val in stmt:
    print(val.fan_page_name)
    print(val.fan_page_id)