# 509 Final Project

## Rsolve dependencies

In [1]:
! pip install newsapi-python



## Globally import libraries

In [132]:
import numpy as np
import pandas as pd
import pymysql as mysql
import matplotlib.pyplot as plt
import os
import shutil
import re
import logging
import time
import zipfile
import requests
from bs4 import BeautifulSoup
import datetime
import re
import regex as rex
from collections import defaultdict, Counter
import random
import requests
from bs4 import BeautifulSoup

import sqlite3
import nltk
from string import punctuation
from nltk.corpus import stopwords
import re
import emoji
from nltk.metrics import ConfusionMatrix
#import mysql.connector

# Set pandas global options
pd.options.display.max_rows = 17

Data Preprocessing Functions:

In [98]:
punctuation = set(punctuation) # speeds up comparison
tw_punct = punctuation - {"#"}

# Stopwords - added the 'nan' to this to remove nulls:
# next step could be to add pronouns like she/her, he/him, etc.

sw = stopwords.words("english")
sw = sw + ['nan']

# Two useful regex
whitespace_pattern = re.compile(r"\s+")
hashtag_pattern = re.compile(r"^#[0-9a-zA-Z]+")

def remove_stop(tokens) :
    # modify this function to remove stopwords

    return[t for t in tokens if t not in sw]
 
def remove_punctuation(text, punct_set=tw_punct) : 
    return("".join([ch for ch in text if ch not in punct_set]))

def tokenize(text) : 
    """ Splitting on whitespace rather than the book's tokenize function. That 
        function will drop tokens like '#hashtag' or '2A', which we need for Twitter. """
    
    return([item.lower() for item in whitespace_pattern.split(text)])
    

# two pipelines to either tokenize or simply remove punctuation
# and lowercase as we will need to extract feature words:

full_pipeline = [str.lower, remove_punctuation, tokenize, remove_stop]
first_pipeline = [str.lower, remove_punctuation]


def prepare(text, pipeline) : 
    tokens = str(text)
    
    for transform in pipeline : 
        tokens = transform(tokens)
        
    return(tokens)

In [99]:
def conv_features(text,fw) :
     feature_set=dict()
     for word in text.split():
          if word in fw:
               feature_set[word]=True
     return(feature_set)

In [2]:
today = datetime.date.today()
print(today)
print(type(today))

2023-06-03
<class 'datetime.date'>


## Connect to NewsAPI client

In [3]:
from newsapi import NewsApiClient

api_key = os.environ['NewsAPIKey']

# Init
newsapi = NewsApiClient(api_key=api_key)

## Pull article info from API

# /v2/top-headlines/sources
sources = newsapi.get_sources()
print(sources)

In [6]:
def news_api_urls(q=None,
                  s=None,
                  d_from='2023-05-01',
                  d_to='2023-05-31',
                  api_lst=[]):
    all_articles = newsapi.get_everything(q=q,
                                          sources=s,
                                          from_param=d_from,
                                          to=d_to,
                                          language='en',
                                          sort_by='relevancy',
                                          page=1)

    print(type(all_articles))
    print(all_articles)
    #print('Article list: ', all_articles['articles'])
    for article in all_articles['articles']:
        print('Source ID:', article['source']['id'])
        print('Source name:', article['source']['name'])
        print('Author:', article['author'])
        print('Title:', article['title'])
        print('URL:', article['url'])
        print('Publish date:', article['publishedAt'])
        print('Article text:', article['content'], '\n')

    # Create a list of tuples from the dictionary data
    source_data01 = [(a['source']['name'],
                      a['author'],
                      a['title'],
                      a['url'],
                      a['publishedAt'],
                      a['content'])
                     for a in all_articles['articles']]

    api_lst.extend(source_data01)
    #print(api_lst)
    print(len(api_lst))

## Connect to API to access URLs

### Set API filter parameters

In [82]:
# Citation comes from Allsides Media Bias Chart
#source_lst = ['cnn', 'reuters', 'fox-news']
#source_lst = ['cnn', 'newsweek', 'fox-news']
#source_lst = ['newsweek']
#source_lst = ['axios']
#source_lst = ['cnn']
source_lst = ['cnn','fox-news', 'msnbc']


#date_lst = ['2023-06-01', '2023-05-31']
#date_lst = ['2023-05-30', '2023-05-29']
#date_lst = ['2023-05-30']
date_lst = ['2023-05-30']

# Comes from academci text (fill in citation from DF)
#q_word_lst = ['gender OR male OR female OR transgender', 'security AND (social OR national)',
#              'justice OR surveillance', 'healthcare OR "health care"',
#              '(political AND (bias OR party)) OR republican OR democrat OR election', '(policy AND (drug OR "affirmative action")) OR regulate OR regulation']

q_word_lst = ['justice OR surveillance', 'healthcare OR "health care"',
              '(political AND (bias OR party)) OR republican OR democrat OR election',
              'security AND (social OR national)']

### Access API

In [83]:
api_record_lst01 = []
for s in source_lst:
    print(f'Source: {s}')
    for d in date_lst:
        print(f'Date: {d}')
        for q in q_word_lst:
            print(f'Query word: {q}')
            time.sleep(5 + 11 * random.random())
            news_api_urls(q=q,
                          s=s,
                          d_from=d,
                          d_to=d,
                          api_lst=api_record_lst01)
            print(s, d, q)
    time.sleep(10 + 13 * random.random())

print(api_record_lst01)
print(len(api_record_lst01))

Source: cnn
Date: 2023-05-30
Query word: justice OR surveillance
<class 'dict'>
{'status': 'ok', 'totalResults': 10, 'articles': [{'source': {'id': 'cnn', 'name': 'CNN'}, 'author': 'Hannah Rabinowitz', 'title': 'US expatriate arrested after allegedly threatening US senators and Marines', 'description': 'An American living abroad was arrested last week on a charge of threatening US senators and political groups over the number of unsolicited political emails he was receiving, the Justice Department announced Tuesday.', 'url': 'https://www.cnn.com/2023/05/30/politics/us-expatriate-arrested-allegedly-threatening-us-senators/index.html', 'urlToImage': 'https://media.cnn.com/api/v1/images/stellar/prod/210209035327-doj-seal.jpg?c=16x9&q=w_800,c_fill', 'publishedAt': '2023-05-30T22:48:22Z', 'content': 'An American living abroad was arrested last week on a charge of threatening US senators and political groups over the number of unsolicited political emails he was receiving, the Justice Depart

Article data saved into dataframe:

In [84]:
api_record_df = pd.DataFrame (list (api_record_lst01),
                                   columns = ['Source','Author','Title','URL','date','content'])

Fox and Newsweek article data saved:

In [39]:
#api_record_df_fox_msnbc = pd.DataFrame (list (api_record_lst01),
#                                   columns = ['Source','Author','Title','URL','date','content'])

CNN only:

In [93]:
#import requests
#from bs4 import BeautifulSoup
#article=[]

#for index, rows in api_record_df.iterrows():
#    article=[]
#    url = api_record_df.at[index,'URL']
#    response = requests.get(url)
#    time.sleep(5 + 10*random.random())

#    soup = BeautifulSoup(response.text, 'html.parser')
#    article_body = soup.find('div', class_='article__content-container')
    
#    if article_body is not None:
#        article_text = article_body.get_text()
#        api_record_df.at[index,'full_text']=article_text


#    #article_body = soup.find('div', {'class': 'article__body-content'})
#    #article_body = soup.find('div', class_='paragraph inline-placeholder')
#    #article_text = soup.find('div', {'class': 'zn-body__paragraph'})




FOX, CNN and MSNBC:

In [87]:
article=[]

for index, rows in api_record_df.iterrows():
    article=[]
    url = api_record_df.at[index,'URL']
    response = requests.get(url)
    time.sleep(5 + 10*random.random())

    soup = BeautifulSoup(response.text, 'html.parser')
    article_body = soup.find('div', class_='article__content-container')
    
    if article_body is None:
        article_body = soup.find('p', class_="speakable")
        if article_body is None:
            article_body = soup.find('div', class_='')
            if article_body is None:
                article_body = soup.find(re.compile('^articleBody*'))
            #else:
                #break
                                         

    if article_body is not None:
        article_text = article_body.get_text()
        api_record_df.at[index,'full_text']=article_text



Fox, CNN, Newsweek:

In [57]:
#article=[]

#for index, rows in api_record_df_fox_newsweek.iterrows():
#    article=[]
#    url = api_record_df_fox_newsweek.at[index,'URL']
#    response = requests.get(url)
#    time.sleep(5 + 10*random.random())

#    soup = BeautifulSoup(response.text, 'html.parser')
#    #text = soup.get_text()
#    #article_body = re.findall(r'^article__content-container.$', text)
#    # Newsweek case:
#    #article_body = soup.find(re.compile('^articleBody*$alternativeHeadline')) #$alternativeHeadline
#    article_body = re.findall(r'^articleBody*$alternativeHeadline', soup.get_text())

#    if article_body is not None:
#        article_text = article_body#.get_text()
#        api_record_df_fox_newsweek.at[index,'full_text']=article_text
#    else: # FOX case
#        #article_body = soup.find(re.compile('^articleBody*$CLICK HERE TO GET THE FOX NEWS APP'))
#        article_body = re.findall(r'^articleBody*$CLICK HERE TO GET THE FOX NEWS APP', soup.get_text())
#        if article_body is not None:
#            article_text = article_body#.get_text()
#            api_record_df_fox_newsweek.at[index,'full_text']=article_text
#        else:
#            break


In [51]:
soup.get_text()

'\n\n\nDemocrat leader: \'un-American\' \'MAGA Republicans\' trying to \'crash\' economy for 2024 political benefit | Fox News\n\n\nFox News Media Fox News MediaFox BusinessFox NationFox News AudioFox WeatherOutkickBooks Fox News   U.S.PoliticsWorldOpinionMediaEntertainmentSportsLifestyleVideoAI More Expand / Collapse search Login Watch TV Menu     U.S. CrimeMilitaryEducationTerrorImmigrationEconomyPersonal FreedomsFox News InvestigatesWorld U.N.ConflictsTerrorismDisastersGlobal EconomyEnvironmentReligionScandalsOpinion Politics ExecutiveSenateHouseJudiciaryForeign PolicyPollsElectionsEntertainment Celebrity NewsMoviesTV NewsMusic NewsStyle NewsEntertainment VideoBusiness Personal FinanceEconomyMarketsWatchlistLifestyleReal EstateTechLifestyle Food + DrinkCars + TrucksTravel + OutdoorsHouse + HomeFitness + Well-beingStyle + BeautyFamilyFaithScience ArchaeologyAir & SpacePlanet EarthWild NatureNatural ScienceDinosaursTech SecurityInnovationDronesComputersVideo GamesMilitary TechHealth C

In [95]:
api_record_df['Source'].value_counts()

Fox News    84
CNN         37
MSNBC       16
Name: Source, dtype: int64

In [88]:
api_record_df.isnull().sum()

Source       0
Author       0
Title        0
URL          0
date         0
content      0
full_text    0
dtype: int64

In [93]:
api_record_df.to_csv("News_API_FOX_CNN_MSNBC.csv", sep=',')

In [89]:
#api_record_df_fox_msnbc.isna().sum()

In [92]:
api_record_df.tail(100)

Unnamed: 0,Source,Author,Title,URL,date,content,full_text
37,Fox News,Maria Lencki,Teen mob that attacked US Marines on Memorial ...,https://www.foxnews.com/media/teen-mob-attacke...,2023-05-30T22:59:26Z,"The mob of San Clemente, California teenagers ...","The mob of San Clemente, California teenagers ..."
38,Fox News,Fox News Staff,JESSE WATTERS: Wall Street liberals force comp...,https://www.foxnews.com/media/jesse-watters-wa...,2023-05-30T23:54:41Z,Fox News host Jesse Watters explains why corpo...,Fox News host Jesse Watters explains why corpo...
39,Fox News,Jon Brown,South Carolina teen killed over bottle of wate...,https://www.foxnews.com/us/south-carolina-teen...,2023-05-30T15:58:20Z,A South Carolina convenience store owner has b...,A South Carolina Shell gas station owner has b...
40,Fox News,"Bret Baier, Amy Munneke",'Special Report' Spotlight: Private companies ...,https://www.foxnews.com/media/special-report-s...,2023-05-30T23:01:07Z,Adversaries like Russia and China are increasi...,Adversaries like Russia and China are increasi...
41,Fox News,Associated Press,Underage ISIS terrorist planned bomb attack in...,https://www.foxnews.com/world/underage-isis-te...,2023-05-30T16:34:03Z,Italian authorities have arrested a minor susp...,Italian authorities have arrested a minor susp...
...,...,...,...,...,...,...,...
132,MSNBC,Steve Benen,Indiana doctor is fined and reprimanded over 1...,https://www.msnbc.com/rachel-maddow-show/maddo...,2023-05-30T17:18:15Z,Dr. Caitlin Bernard might not have a household...,IE 11 is not supported. For an optimal experie...
133,MSNBC,Steve Benen,"Striking a debt ceiling deal is one thing, pas...",https://www.msnbc.com/rachel-maddow-show/maddo...,2023-05-30T12:00:30Z,Was it easy? No. Was it part of a dangerous ho...,IE 11 is not supported. For an optimal experie...
134,MSNBC,Hayes Brown,Biden and Cruz both say they hate this anti-LG...,https://www.msnbc.com/opinion/msnbc-opinion/te...,2023-05-30T22:48:05Z,"Sen. Ted Cruz, R-Texas, is wrong about many th...",IE 11 is not supported. For an optimal experie...
135,MSNBC,Steve Benen,Tuberville’s comments on inner-city teachers a...,https://www.msnbc.com/rachel-maddow-show/maddo...,2023-05-30T15:43:18Z,Its not uncommon for Republican officials to t...,IE 11 is not supported. For an optimal experie...


In [10]:
#api_record_lst01 = [(1, 1, 1), (2, 2, 2), (3, 3, 3), (1, 1, 1), (4, 4, 4), (3, 3, 3)]
#print(api_record_lst01)
api_record_set01 = set(api_record_lst01)
print(api_record_set01)
api_record_lst02 = list(api_record_set01)
print(api_record_lst02)
print(len(api_record_lst02))

109


Create feature word set based on common words in dataset:

In [100]:
api_record_df['tokens']= api_record_df['full_text'].apply(prepare,
                                                     pipeline=full_pipeline)

In [None]:
api_record_df['cleaner_text']= api_record_df['full_text'].apply(prepare,
                                                     pipeline=first_pipeline)

In [143]:
api_record_df['Political_Lean'] = np.where((api_record_df['Source'] == "CNN") | (api_record_df['Source'] == "MSNBC"), 'Left', 'Right')

In [146]:
api_record_df.head(100)

Unnamed: 0,Source,Author,Title,URL,date,content,full_text,tokens,cleaner_text,Political_Lean
0,CNN,Hannah Rabinowitz,US expatriate arrested after allegedly threate...,https://www.cnn.com/2023/05/30/politics/us-exp...,2023-05-30T22:48:22Z,An American living abroad was arrested last we...,\n\n\n\n\nCNN\n — \n \n\n\n An...,"[, cnn, —, american, living, abroad, arrested,...",\n\n\n\n\ncnn\n — \n \n\n\n an...,Left
1,CNN,"Nick Valencia,Christina Maxouris,Devon M. Sayers",The 11-year-old boy shot in the chest by Missi...,https://www.cnn.com/2023/05/30/us/mississippi-...,2023-05-30T21:15:44Z,The 11-year-old boy who was shot in the chest ...,\n\n\n\n\nCNN\n — \n \n\n\n Th...,"[, cnn, —, 11yearold, boy, shot, chest, missis...",\n\n\n\n\ncnn\n — \n \n\n\n th...,Left
2,CNN,Eric Levenson,Opening statements set for today in Pittsburgh...,https://www.cnn.com/2023/05/30/us/pittsburgh-t...,2023-05-30T12:51:47Z,Opening statements are set for Tuesday in the ...,\n\n\n\n\nCNN\n — \n \n\n\n Th...,"[, cnn, —, federal, death, penalty, trial, man...",\n\n\n\n\ncnn\n — \n \n\n\n th...,Left
3,CNN,Nicki Brown,Colleges will soon have option to hide student...,https://www.cnn.com/2023/05/30/us/college-comm...,2023-05-30T17:37:28Z,Colleges will soon have the option to hide stu...,\n\n\n\n\nCNN\n — \n \n\n\n Co...,"[, cnn, —, colleges, soon, option, hide, stude...",\n\n\n\n\ncnn\n — \n \n\n\n co...,Left
4,CNN,Rob Frehse,Man arrested and charged with first-degree mur...,https://www.cnn.com/2023/05/30/us/new-jersey-c...,2023-05-30T22:34:22Z,A man in Virginia has been arrested and charge...,\n\n\n\n\nCNN\n — \n \n\n\n A ...,"[, cnn, —, man, virginia, arrested, charged, f...",\n\n\n\n\ncnn\n — \n \n\n\n a ...,Left
...,...,...,...,...,...,...,...,...,...,...
95,Fox News,Newt Gingrich,Corrupt media protects corrupt establishment a...,https://www.foxnews.com/opinion/corrupt-media-...,2023-05-30T10:00:05Z,The conspiracy between a corrupt set of bureau...,The conspiracy between a corrupt set of bureau...,"[conspiracy, corrupt, set, bureaucracies, incl...",the conspiracy between a corrupt set of bureau...,Right
96,Fox News,Chris Pandolfo,Texas Gov Greg Abbott to sign bill taking on '...,https://www.foxnews.com/politics/texas-gov-gre...,2023-05-30T15:46:36Z,"Texas is about to crack down on ""rogue"" distri...","Texas is about to crack down on ""rogue"" distri...","[texas, crack, rogue, district, attorneys, ref...",texas is about to crack down on rogue district...,Right
97,Fox News,Associated Press,Most US adults believe race should play a role...,https://www.foxnews.com/us/most-us-adults-beli...,2023-05-30T11:06:59Z,As the Supreme Court decides the fate of affir...,As the Supreme Court decides the fate of affir...,"[supreme, court, decides, fate, affirmative, a...",as the supreme court decides the fate of affir...,Right
98,Fox News,Elizabeth Elkind,"Manchin praises debt ceiling deal, predicts pa...",https://www.foxnews.com/politics/manchin-prais...,2023-05-30T17:26:30Z,"Sen. Joe Manchin, D-W.Va., said Tuesday he ""ab...","Sen. Joe Manchin, D-W.Va., said Tuesday he ""ab...","[sen, joe, manchin, dwva, said, tuesday, absol...",sen joe manchin dwva said tuesday he absolutel...,Right


In [102]:
# Obtain total counts of words in the entire corpus:

word_dist=Counter([item for sublist in api_record_df['tokens'] 
                   for item in sublist])

Preparing data for classifier ingestion (as dictionary)

In [147]:
news_data=dict()
#news_data=[(conv_features(text,feature_words2), 
#                party) for (text, party) in convention_data]
#for index, row in api_record_df.iterrows():
    
news_data=[(api_record_df.at[index,'cleaner_text'],
            api_record_df.at[index,'Political_Lean']) for (index,row) in api_record_df.iterrows()]

Determine featureword set:

In [125]:
word_cutoff=5
feature_words2=[]
for word, count in word_dist.items() :
    #for convention_data_df['word_dist']
    if count > word_cutoff :
        feature_words2.append(word)
            
print(f"With a word cutoff of {word_cutoff}, we have {len(feature_words2)} as features in the model.")

With a word cutoff of 5, we have 1505 as features in the model.


In [107]:
feature_words2

['',
 'cnn',
 '—',
 'american',
 'living',
 'arrested',
 'last',
 'week',
 'threatening',
 'us',
 'senators',
 'political',
 'groups',
 'number',
 'emails',
 'receiving',
 'justice',
 'department',
 'announced',
 'tuesday',
 'welton',
 'lived',
 'thailand',
 'decade',
 'allegedly',
 'made',
 'multiple',
 'calls',
 'overseas',
 '2021',
 'north',
 'carolina',
 'republican',
 'sen',
 'according',
 'court',
 'documents',
 'thursday',
 'international',
 'returned',
 'united',
 'states',
 'facing',
 'one',
 'federal',
 'official',
 'faces',
 '10',
 'years',
 'prison',
 'convicted',
 'lawyer',
 'yet',
 'public',
 'reached',
 'comment',
 'kill',
 'people',
 'saying',
 'would',
 'back',
 'said',
 'getting',
 'telling',
 'find',
 'sent',
 'cut',
 'also',
 'white',
 'house',
 'interview',
 'father',
 'claimed',
 'received',
 'agree',
 'marines',
 'working',
 'consulate',
 'country',
 'denied',
 'wife',
 'i’m',
 'going',
 'bunch',
 'that’s',
 'fking',
 'tell',
 'me”',
 'november',
 'want',
 'live'

In [127]:
# remove cnn and fox-news from the key featurewords:

feature_words2.remove("cnn")
feature_words2.remove("fox")


In [148]:
featuresets=dict()
featuresets = [(conv_features(text,feature_words2), 
                lean) for (text, lean) in news_data]

In [129]:
featuresets

[({'—': True,
   'american': True,
   'living': True,
   'arrested': True,
   'last': True,
   'week': True,
   'threatening': True,
   'us': True,
   'senators': True,
   'political': True,
   'groups': True,
   'number': True,
   'emails': True,
   'receiving': True,
   'justice': True,
   'department': True,
   'announced': True,
   'tuesday': True,
   'welton': True,
   'lived': True,
   'thailand': True,
   'decade': True,
   'allegedly': True,
   'made': True,
   'multiple': True,
   'calls': True,
   'overseas': True,
   '2021': True,
   'north': True,
   'carolina': True,
   'republican': True,
   'sen': True,
   'according': True,
   'court': True,
   'documents': True,
   'thursday': True,
   'international': True,
   'returned': True,
   'united': True,
   'states': True,
   'facing': True,
   'one': True,
   'federal': True,
   'official': True,
   'faces': True,
   '10': True,
   'years': True,
   'prison': True,
   'convicted': True,
   'lawyer': True,
   'yet': True,
   

In [149]:
random.seed(20220507)
random.shuffle(featuresets)
test_size = 20


In [150]:
test_set=dict()
train_set=dict()
test_set, train_set = featuresets[:test_size], featuresets[test_size:]

In [151]:
classifier = nltk.NaiveBayesClassifier.train(train_set)
print(nltk.classify.accuracy(classifier, test_set))

0.5


In [152]:
# Confusion Matrix:

predicted_labels = [classifier.classify(features) for features, 
                    label in test_set]
gold_labels = [label for features, label in test_set]

cm = ConfusionMatrix(gold_labels, predicted_labels)
print(cm.pretty_format(sort_by_count=True, show_percents=True))

      |             R |
      |      L      i |
      |      e      g |
      |      f      h |
      |      t      t |
------+---------------+
 Left | <50.0%>     . |
Right |  50.0%     <.>|
------+---------------+
(row = reference; col = test)



## Initiate MySQL connection

In [14]:
'''Set local environment variables to hide user name & password citation:
https://www.geeksforgeeks.org/how-to-hide-sensitive-credentials-using-python/'''

user_name = "sqluser"
user_pass = "password"

#user_name = os.environ['MySQLUSRAC']
#user_pass = os.environ['MySQLPWDAC']

# Instantiate connection
db_conn = mysql.connect(host='localhost',
                        port=int(3306),
                        user=user_name,
                        passwd=user_pass,
                        db='509_final_proj')

# Create a cursor object
cursor = db_conn.cursor()

In [15]:
tbl_names = pd.read_sql('SHOW TABLES', db_conn)

display(tbl_names)
print(type(tbl_names))



Unnamed: 0,Tables_in_509_final_proj


<class 'pandas.core.frame.DataFrame'>


### Establish logging policy

In [20]:
'''Logging citations (see additional code in following code blocks:
OpenAI. (2021). ChatGPT [Computer software]. https://openai.com/;
https://docs.python.org/3/howto/logging.html#logging-basic-example;
https://docs.python.org/3/howto/logging.html#logging-to-a-file;
https://docs.python.org/3/howto/logging-cookbook.html#using-a-rotating-log-file-handler;
https://docs.python.org/3/howto/logging-cookbook.html#using-a-timed-rotating-file-handler'''

# Set up logging
logging.basicConfig(level=logging.INFO,
                    filename='pymysql.log',
                    filemode='a',
                    format='>>>>>>>>>>>>>><<<<<<<<<<<<<<\n%(asctime)s - %(levelname)s - %(message)s')

### Update individual tables

#### Update `news_articles` table from API

In [21]:
nat_tbl_name = 'nar_temp'
nwa_tbl_name = 'news_articles'

In [22]:
'''Using cursor and loading into temp file:
OpenAI. (2021). ChatGPT [Computer software]. https://openai.com/;
https://pynative.com/python-mysql-insert-data-into-database-table/'''

# Execute query and measure execution time
start_time = time.time()

# Wipe temp table
try:
    nat_dlt_tble_stmnt = f"""DELETE FROM {nat_tbl_name}"""
    cursor.execute(nat_dlt_tble_stmnt)
    logging.info(f'Successfully executed query:\n{nat_dlt_tble_stmnt}\n\nRecords scanned: {cursor.rowcount}')
except mysql.Error as e:
    logging.error(f'Error executing query:\n{nat_dlt_tble_stmnt}\n\n{e}')
finally:
    end_time = time.time()
    logging.info(f'Time taken: {end_time - start_time:.3f} seconds\n>>>>>>>>>>>>>><<<<<<<<<<<<<<\n\n')

# Execute query and measure execution time
start_time = time.time()

# Load data from CSV file into a temporary table
try:
    nat_csv_load_stmnt = f"""
    INSERT INTO {nat_tbl_name}
    (
    source_name,
    author,
    title,
    url,
    publish_date,
    content
    )
    VALUES (%s, %s, %s, %s, %s, %s)
    """

    # Execute the query with multiple values
    cursor.executemany(nat_csv_load_stmnt, api_record_lst02)
    #cursor.execute(nat_csv_load_stmnt)
    logging.info(f'Successfully executed query:\n{nat_csv_load_stmnt}\n\nRecords scanned: {cursor.rowcount}')
except mysql.Error as e:
    logging.error(f'Error executing query:\n{nat_csv_load_stmnt}\n\n{e}')
finally:
    end_time = time.time()
    logging.info(f'Time taken: {end_time - start_time:.3f} seconds\n>>>>>>>>>>>>>><<<<<<<<<<<<<<\n\n')

# Execute query and measure execution time
start_time = time.time()

# Insert new records into main table
try:
    nwa_load_stmnt = f"""
    INSERT INTO {nwa_tbl_name}
    (
    source_name,
    author,
    title,
    url,
    publish_date,
    content
    )
    SELECT
        tp.source_name,
        tp.author,
        tp.title,
        tp.url,
        tp.publish_date,
        tp.content
    FROM {nat_tbl_name} AS tp
    LEFT JOIN {nwa_tbl_name} AS mn
        ON tp.title = mn.title
            AND CAST(LEFT(tp.publish_date, 10) AS DATE) = CAST(LEFT(mn.publish_date, 10) AS DATE)
            AND tp.author = mn.author
    """
    cursor.execute(nwa_load_stmnt)
    logging.info(f'Successfully executed query:\n{nwa_load_stmnt}\n\nRecords scanned: {cursor.rowcount}')
except mysql.Error as e:
    logging.error(f'Error executing query:\n{nwa_load_stmnt}\n\n{e}')
finally:
    end_time = time.time()
    logging.info(f'Time taken: {end_time - start_time:.3f} seconds\n>>>>>>>>>>>>>><<<<<<<<<<<<<<\n\n')

# Execute query and measure execution time
start_time = time.time()

# Wipe temp table
try:
    cursor.execute(nat_dlt_tble_stmnt)
    logging.info(f'Successfully executed query:\n{nat_dlt_tble_stmnt}\n\nRecords scanned: {cursor.rowcount}')
except mysql.Error as e:
    logging.error(f'Error executing query:\n{nat_dlt_tble_stmnt}\n\n{e}')
finally:
    end_time = time.time()
    logging.info(f'Time taken: {end_time - start_time:.3f} seconds\n>>>>>>>>>>>>>><<<<<<<<<<<<<<\n\n')

### Commit changes and close cursor and connection instances

In [23]:
# Commit the changes to the database
db_conn.commit()

# Close the cursor and database connection
cursor.close()
db_conn.close()