In [26]:
import pandas as pd
import numpy as np

import psycopg
from typing import Any, Callable, Optional
import os

CONN_INFO = {
    "dbname": "personality",
    "user": "admin",
    "password": "top_secret_password_db_personality",
    "host": "localhost",
    "port": 5433
}

def execute_query(query: str, params: Optional[dict]) -> list[tuple]:
    with psycopg.connect(**CONN_INFO) as conn:
        with conn.cursor() as cursor:
            cursor.execute(query, params)
            results = cursor.fetchall()

    conn.close()
    return results

In [47]:
query = """
WITH PearsonCoefficients AS (
    SELECT
        g.genre,
        p.trait AS personality_type,
        (
            (COUNT(*) * SUM(rating * p.value) - SUM(rating) * SUM(p.value))
            /
            (SQRT(COUNT(*) * SUM(rating * rating) - SUM(rating) * SUM(rating))
            * SQRT(COUNT(*) * SUM(p.value * p.value) - SUM(p.value) * SUM(p.value)))
        ) AS pearson_coeff
    FROM users u
    JOIN ratings r ON r.user_id = u.user_id
    JOIN movies m ON r.movie_id = m.movie_id
    JOIN movie_genre mg ON mg.movie_id = m.movie_id
    JOIN genres g ON g.genre_id = mg.genre_id
    JOIN (
        SELECT user_id, 'openness' AS trait, openness AS value FROM users
        UNION ALL
        SELECT user_id, 'agreeableness', agreeableness FROM users
        UNION ALL
        SELECT user_id, 'emotional_stability', emotional_stability FROM users
        UNION ALL
        SELECT user_id, 'conscientiousness', conscientiousness FROM users
        UNION ALL
        SELECT user_id, 'extraversion', extraversion FROM users
    ) p ON u.user_id = p.user_id
    GROUP BY g.genre, p.trait
)

SELECT * FROM (
    (SELECT * FROM PearsonCoefficients ORDER BY pearson_coeff DESC LIMIT 5)
    UNION ALL
    (SELECT * FROM PearsonCoefficients ORDER BY pearson_coeff ASC LIMIT 5)
) AS CombinedResults
ORDER BY pearson_coeff DESC;
"""

params = {}

results = execute_query(query, params)
positive_results = [(x[0], x[1]) for x in results[:5]]
negative_results = [(x[0], x[1]) for x in results[5:]]
response = jsonify({'positive_correlations' : positive_results,
                    'negative_correlations': negative_results})
return response

[('IMAX', 'agreeableness', Decimal('0.1202913985514026398897')),
 ('Children', 'agreeableness', Decimal('0.1153144637177246065352')),
 ('Animation', 'agreeableness', Decimal('0.1080497799219568641606')),
 ('Adventure', 'agreeableness', Decimal('0.0946745039190321448027')),
 ('Fantasy', 'agreeableness', Decimal('0.0928493914926192773162')),
 ('Animation', 'conscientiousness', Decimal('-0.0309753602771566697890')),
 ('Documentary', 'conscientiousness', Decimal('-0.0313560996690476285496')),
 ('Horror', 'conscientiousness', Decimal('-0.0317291625429159791529')),
 ('Western', 'conscientiousness', Decimal('-0.0326193464859985990712')),
 ('Film-Noir', 'conscientiousness', Decimal('-0.057196987942644134599400'))]