In [4]:
import datetime

# Get the current date and time
now = datetime.datetime.now()

# Print it
print("Last run:",now)

Last run: 2025-07-04 15:57:18.161871


In [5]:
import os
# from dotenv import load_dotenv
from google.colab import userdata
import pandas as pd
from sqlalchemy import create_engine, text
import psycopg2

# Load environment variables from .env file
# load_dotenv()

# Access the variables
# db_host = os.getenv('DB_HOST')
# db_port = os.getenv('DB_PORT', '5432')
# db_name = os.getenv('DB_NAME')
# db_user = os.getenv('DB_USER')
# db_pass = os.getenv('DB_PASS')
db_host = userdata.get('DB_HOST')
db_port = userdata.get('DB_PORT')
db_name = userdata.get('DB_NAME')
db_user = userdata.get('DB_USER')
db_pass = userdata.get('DB_PASS')

db_connection_str = None # Initialize
engine = None # Initialize

if not all([db_host, db_name, db_user, db_pass]):
    print("ERROR: Database credentials not fully loaded from .env or environment.")
    print("Please ensure DB_HOST, DB_NAME, DB_USER, and DB_PASS are in your .env file or environment.")
else:
    print("Database credentials loaded successfully.")
    # Construct the SQLAlchemy connection string
    db_connection_str = f'postgresql+psycopg2://{db_user}:{db_pass}@{db_host}:{db_port}/{db_name}'
    try:
        engine = create_engine(db_connection_str)

        # Test connection with a simple query
        # Use a context manager for the connection to ensure it's closed
        with engine.connect() as connection:
            # Wrap the SQL string in text() for direct execution
            result = connection.execute(text("SELECT version();"))
            version_row = result.fetchone() # Fetch one row
            if version_row:
                print(f"\nConnection to PostgreSQL successful! Version: {version_row[0]}")
            else:
                print("\nConnection to PostgreSQL successful, but version query returned no result.")
            # The connection is automatically closed when exiting the 'with' block

    except Exception as e:
        print(f"\nFailed to create SQLAlchemy engine or connect: {e}")
        engine = None # Ensure engine is None if connection failed

Database credentials loaded successfully.

Connection to PostgreSQL successful! Version: PostgreSQL 16.8 (Debian 16.8-1.pgdg120+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 12.2.0-14) 12.2.0, 64-bit


In [6]:
if engine:
    sql_query = """
    SELECT
        W.word_id,
        W.word,
        COUNT(V.vote_id) AS total_votes,
        COUNT(DISTINCT V.user_id) AS unique_voters
    FROM
        words W
    INNER JOIN
        votes V ON W.word_id = V.word_id
    GROUP BY
        W.word_id, W.word  -- Group by ID and text
    ORDER BY
        total_votes DESC;
    """
    try:
        print("--- Query: All words, ordered by total votes, with the number of unique voters ---")
        print(sql_query)
        df_query = pd.read_sql_query(sql_query, engine)
        display(df_query)
    except Exception as e:
        print(f"Error executing query: {e}")
else:
    print("Database engine not available. Please run the connection cell first.")

--- Query: All words, ordered by total votes, with the number of unique voters ---

    SELECT
        W.word_id,                      
        W.word,
        COUNT(V.vote_id) AS total_votes,
        COUNT(DISTINCT V.user_id) AS unique_voters
    FROM
        words W
    INNER JOIN
        votes V ON W.word_id = V.word_id
    GROUP BY
        W.word_id, W.word  -- Group by ID and text
    ORDER BY
        total_votes DESC; 
    


Unnamed: 0,word_id,word,total_votes,unique_voters
0,4,ethical,155,11
1,11,corrupt,138,9
2,78,hateful,131,10
3,10,evil,105,9
4,152,lackey,93,5
...,...,...,...,...
139,189,responsible,1,1
140,218,wishywashy,1,1
141,187,conservative,1,1
142,182,passionate,1,1


In [7]:
if engine:
    sql_query = """
    SELECT
        P.name,
        P.politician_id,
        COUNT(V.vote_id) AS votes
    FROM
        politicians P
    INNER JOIN
        votes V ON P.politician_id = V.politician_id
    GROUP BY
        P.politician_id, P.name
    ORDER BY
        votes DESC;
    """
    try:
        print("--- Query: All politicians, with their IDs, and the number of submissions for each ---")
        print(sql_query)
        df_query = pd.read_sql_query(sql_query, engine)
        display(df_query)
    except Exception as e:
        print(f"Error executing query: {e}")
else:
    print("Database engine not available. Please run the connection cell first.")

--- Query: All politicians, with their IDs, and the number of submissions for each ---

    SELECT
        P.name,
        P.politician_id,
        COUNT(V.vote_id) AS votes
    FROM
        politicians P
    INNER JOIN
        votes V ON P.politician_id = V.politician_id
    GROUP BY
        P.politician_id, P.name
    ORDER BY
        votes DESC;
    


Unnamed: 0,name,politician_id,votes
0,Donald Trump,1,960
1,Bernie Sanders,2,249
2,Pete Hegseth,600,153
3,Mitch McConnell,599,139
4,JD Vance,591,122
5,Cory Booker,3,111
6,Kristi Noem,624,88
7,Alexandria Ocasio-Cortez,36,82
8,Ted Cruz,5,69
9,Marjorie Taylor Greene,630,69


In [8]:
if engine:
    # --- Set the target politician ID ---
    target_politician_id = 1
    # --- ---

    sql_query = """
    SELECT
        W.word,
        W.word_id,
        COUNT(V.vote_id) AS votes
    FROM
        words W
    INNER JOIN
        votes V ON W.word_id = V.word_id
    WHERE
        V.politician_id = %(pol_id)s
    GROUP BY
        W.word_id, W.word
    ORDER BY
        votes DESC;
    """
    try:
        print(f"--- Query: Words with their IDs submitted for Politician ID = {target_politician_id} ---")
        print(sql_query)
        df_query = pd.read_sql_query(sql_query, engine, params={'pol_id': target_politician_id})
        display(df_query)
    except Exception as e:
        print(f"Error executing query: {e}")
else:
    print("Database engine not available. Please run the connection cell first.")

--- Query: Words with their IDs submitted for Politician ID = 1 ---

    SELECT
        W.word,
        W.word_id,
        COUNT(V.vote_id) AS votes
    FROM
        words W
    INNER JOIN
        votes V ON W.word_id = V.word_id
    WHERE
        V.politician_id = %(pol_id)s
    GROUP BY
        W.word_id, W.word
    ORDER BY
        votes DESC;
    


Unnamed: 0,word,word_id,votes
0,corrupt,11,85
1,cruel,35,83
2,greedy,110,58
3,liar,146,57
4,traitor,142,39
5,dictator,8,37
6,evil,10,37
7,selfish,14,34
8,seditious,130,34
9,genius,51,34


In [9]:
if engine:
    sql_query = """
    SELECT
        W.word,
        W.word_id,
        COUNT(V.vote_id) AS votes,
        COUNT(DISTINCT V.politician_id) AS politicians
    FROM
        words W
    INNER JOIN
        votes V ON W.word_id = V.word_id
    GROUP BY
        W.word_id, W.word
    HAVING
        COUNT(DISTINCT V.user_id) = 1  -- Filter for words with exactly one unique voter
    ORDER BY
        votes DESC;
    """
    try:
        print("--- Query: All words with exactly one unique voter ---")
        print(sql_query)
        df_query = pd.read_sql_query(sql_query, engine)
        if not df_query.empty:
            display(df_query)
        else:
            print("No words found with only one unique voter.")
    except Exception as e:
        print(f"Error executing query: {e}")
else:
    print("Database engine not available. Please run the connection cell first.")

--- Query: All words with exactly one unique voter ---

    SELECT
        W.word,
        W.word_id,
        COUNT(V.vote_id) AS votes,
        COUNT(DISTINCT V.politician_id) AS politicians
    FROM
        words W
    INNER JOIN
        votes V ON W.word_id = V.word_id
    GROUP BY
        W.word_id, W.word
    HAVING
        COUNT(DISTINCT V.user_id) = 1  -- Filter for words with exactly one unique voter
    ORDER BY
        votes DESC;
    


Unnamed: 0,word,word_id,votes,politicians
0,eloquent,115,18,3
1,loser,213,11,1
2,awesome,141,11,1
3,ass,205,10,1
4,rich,206,9,1
5,reasonable,89,9,2
6,strong,191,8,1
7,loquacious,59,7,1
8,thug,172,6,1
9,heartless,120,6,1


In [10]:
if engine:
    sql_query = """
    SELECT
        W.word,
        W.word_id,
        COUNT(V.vote_id) AS votes,
        COUNT(DISTINCT V.politician_id) AS politicians,
        COUNT(DISTINCT V.user_id) AS voters
    FROM
        words W
    INNER JOIN
        votes V ON W.word_id = V.word_id
    GROUP BY
        W.word_id, W.word
    HAVING
        COUNT(DISTINCT V.user_id) >= 2  -- Filter for words with at least two unique voters
    ORDER BY
        voters DESC, votes DESC;
    """
    try:
        print("--- Query: All words with 2 or more unique voters ---")
        print(sql_query)
        df_query = pd.read_sql_query(sql_query, engine)
        if not df_query.empty:
            display(df_query)
        else:
            print("No words found with at least two unique voters.")
    except Exception as e:
        print(f"Error executing query: {e}")
else:
    print("Database engine not available. Please run the connection cell first.")

--- Query: All words with 2 or more unique voters ---

    SELECT
        W.word,
        W.word_id,
        COUNT(V.vote_id) AS votes,
        COUNT(DISTINCT V.politician_id) AS politicians,
        COUNT(DISTINCT V.user_id) AS voters
    FROM
        words W
    INNER JOIN
        votes V ON W.word_id = V.word_id
    GROUP BY
        W.word_id, W.word
    HAVING
        COUNT(DISTINCT V.user_id) >= 2  -- Filter for words with at least two unique voters
    ORDER BY
        voters DESC, votes DESC;
    


Unnamed: 0,word,word_id,votes,politicians,voters
0,ethical,4,155,14,11
1,hateful,78,131,10,10
2,wise,6,56,2,10
3,corrupt,11,138,6,9
4,evil,10,105,6,9
...,...,...,...,...,...
95,articulate,210,2,2,2
96,what,131,2,1,2
97,delusional,148,2,1,2
98,cop,65,2,1,2


In [11]:
if engine:
    target_politician_id = 1
    # !!! IMPORTANT: Replace 'V.created_at' below with the ACTUAL timestamp column name in your 'votes' table !!!
    actual_timestamp_column = 'V.created_at' # For example, if your column is named 'created_at'

    sql_query = f"""
    WITH WeeklyWordCounts AS (
        SELECT
            DATE_TRUNC('week', {actual_timestamp_column}) AS week_start,
            W.word_id,
            W.word,
            COUNT(V.vote_id) AS word_submissions_in_week,
            COUNT(DISTINCT V.user_id) AS unique_voters_in_week -- Added this line
        FROM
            votes V
        INNER JOIN
            words W ON V.word_id = W.word_id
        WHERE
            V.politician_id = %(pol_id)s
        GROUP BY
            DATE_TRUNC('week', {actual_timestamp_column}),
            W.word_id,
            W.word
    ),
    RankedWeeklyWords AS (
        SELECT
            week_start,
            word_id,
            word,
            word_submissions_in_week,
            unique_voters_in_week, -- Carry this through
            ROW_NUMBER() OVER (PARTITION BY week_start
                               ORDER BY word_submissions_in_week DESC, word ASC) as rn
        FROM
            WeeklyWordCounts
    )
    SELECT
        week_start,
        word AS most_used_word,
        word_submissions_in_week AS votes,
        unique_voters_in_week AS voters -- Select it in the final output
    FROM
        RankedWeeklyWords
    WHERE
        rn = 1
    ORDER BY
        week_start ASC;
    """
    try:
        print(f"--- Query: Most used word per week for Politician ID = {target_politician_id} ---")
        # print(sql_query) # For debugging
        # print(f"[parameters: {{'pol_id': {target_politician_id}}}]")

        df_query = pd.read_sql_query(
            sql_query,
            engine,
            params={'pol_id': target_politician_id}
        )
        if not df_query.empty:
            display(df_query)
        else:
            print(f"No weekly word usage data found for Politician ID {target_politician_id}.")
    except Exception as e:
        print(f"AN ERROR OCCURRED:\n{e}")
        if hasattr(e, 'orig') and e.orig:
            print(f"\nOriginal driver error details:\n{e.orig}")
            if hasattr(e.orig, 'pgerror'):
                print(f"PostgreSQL Error Message: {e.orig.pgerror}")
            if hasattr(e.orig, 'diag') and hasattr(e.orig.diag, 'message_detail'):
                 print(f"PostgreSQL Error Detail: {e.orig.diag.message_detail}")
else:
    print("Database engine not available. Please run the connection cell first.")

--- Query: Most used word per week for Politician ID = 1 ---


Unnamed: 0,week_start,most_used_word,votes,voters
0,2025-04-14 00:00:00+00:00,insane,15,2
1,2025-04-21 00:00:00+00:00,cruel,10,3
2,2025-04-28 00:00:00+00:00,hateful,5,4
3,2025-05-05 00:00:00+00:00,chaotic,1,1
4,2025-05-12 00:00:00+00:00,dangerous,5,1
5,2025-05-19 00:00:00+00:00,borders,4,1
6,2025-05-26 00:00:00+00:00,russian,15,2
7,2025-06-02 00:00:00+00:00,traitor,20,2
8,2025-06-09 00:00:00+00:00,corrupt,6,1
9,2025-06-16 00:00:00+00:00,brazen,11,2


In [12]:
# This code assumes the previous script part has been run and 'engine' is available.

if engine:
    # --- Code to report the total number of unique submitters ---

    # IMPORTANT: Replace 'user_id' with the actual column name in your 'words' table
    # that identifies the user who submitted the word.
    # Common alternatives: submitter_id, author_id, created_by_user_id
    submitter_column_name = 'user_id' # <<< ---- CHANGE THIS IF NEEDED

    sql_unique_submitters_query = f"""
    SELECT
        COUNT(DISTINCT {submitter_column_name}) AS total_unique_submitters
    FROM
        words;
    """
    try:
        print(f"\n--- Query: Total number of unique submitters from the '{submitter_column_name}' column in 'words' table ---")
        print(sql_unique_submitters_query)

        # Execute the query and get the result into a DataFrame
        df_submitters = pd.read_sql_query(sql_unique_submitters_query, engine)

        # The result will be a DataFrame with one row and one column.
        # We can extract the single value.
        if not df_submitters.empty:
            total_unique_submitters = df_submitters.iloc[0]['total_unique_submitters']
            print(f"\nTotal number of unique submitters: {total_unique_submitters}")
        else:
            print("\nQuery executed, but no result returned (e.g., the 'words' table might be empty or the column doesn't exist).")

        # Optionally, display the DataFrame itself
        # print("\nDataFrame result:")
        # display(df_submitters)

    except Exception as e:
        print(f"Error executing query for unique submitters: {e}")
else:
    print("Database engine not available. Please ensure the connection part of the script was run successfully.")


--- Query: Total number of unique submitters from the 'user_id' column in 'words' table ---

    SELECT
        COUNT(DISTINCT user_id) AS total_unique_submitters
    FROM
        words;
    

Total number of unique submitters: 23


In [13]:
# This code assumes the previous script part has been run and 'engine' is available.

if engine:
    # --- Set the target word to look up ---
    target_word = 'smart'
    # --- ---

    sentiment_column_name = 'sentiment_score' # <<<--- CHANGE THIS if your column name is different.

    sql_query = f"""
    SELECT
        word,
        {sentiment_column_name}
    FROM
        words
    WHERE
        word = %(word_text)s;
    """
    try:
        print(f"--- Query: Sentiment value for the word '{target_word}' ---")
        print(sql_query)

        df_query = pd.read_sql_query(
            sql_query,
            engine,
            params={'word_text': target_word}
        )

        if not df_query.empty:
            # Assuming the word is unique, get the value from the first row.
            sentiment = df_query.iloc[0][sentiment_column_name]
            print(f"\nThe sentiment value for '{target_word}' is: {sentiment}")

            # Optionally display the full DataFrame result
            # display(df_query)
        else:
            print(f"\nNo word found with the name '{target_word}' in the 'words' table.")

    except Exception as e:
        print(f"\nAn error occurred while executing the query: {e}")
        print(f"Please check that the 'words' table and the sentiment column ('{sentiment_column_name}') exist.")
else:
    print("Database engine not available. Please run the connection cell first.")

--- Query: Sentiment value for the word 'smart' ---

    SELECT
        word,
        sentiment_score
    FROM
        words
    WHERE
        word = %(word_text)s;
    

The sentiment value for 'smart' is: 0.4019
