In [2]:
import os
from sqlalchemy import create_engine
from dotenv import load_dotenv
import pandas as pd

In [3]:
load_dotenv()

pg_user = os.getenv('PG_USER')
pg_password = os.getenv('PG_PASSWORD')
pg_host = os.getenv('PG_HOST')
pg_port = os.getenv('PG_PORT')
pg_db = os.getenv('PG_GPU_DB')

pg_conn_str = f"postgresql+psycopg2://{pg_user}:{pg_password}@{pg_host}:{pg_port}/{pg_db}"
pg_engine = create_engine(pg_conn_str)

Descriptive Analytics

Business Question: What is the average GPU price for different levels of memory size and clock speed?

In [5]:
sql_query = '''
WITH joined_specs AS (
    SELECT
        api."productName",
        api."memSize",
        api."gpuClock",
        web."MSRP"
    FROM sql_project.gpu_specs_raw api
    JOIN sql_project.gpu_hierarchy_web_raw web
        ON api."productName" = web."Graphics Card"
    WHERE web."MSRP" IS NOT NULL
)
SELECT
    CAST(js."memSize" AS NUMERIC) AS mem_size_gb,
    js."gpuClock" AS gpu_clock_mhz,
    AVG(CAST(REGEXP_REPLACE(js."MSRP", '[$,]', '', 'g') AS NUMERIC)) AS avg_price_usd
FROM joined_specs js
GROUP BY mem_size_gb, gpu_clock_mhz
ORDER BY avg_price_usd DESC;
'''
df = pd.read_sql_query(sql_query, con=pg_engine)
print(df)

    mem_size_gb  gpu_clock_mhz  avg_price_usd
0          28.0            900         2000.0
1          24.0            975         1600.0
2          24.0            810         1000.0
3          12.0           1312          800.0
4          20.0           1065          750.0
5          16.0            900          750.0
6          12.0           1245          550.0
7          12.0           1825          550.0
8          12.0           1290          400.0
9           8.0            650          330.0
10         16.0           1825          330.0
11          8.0           1200          300.0
12          8.0           1140          270.0


Insights:
-highest avg price is $2000 for GPU with 28GB of memory
-higher memory tends to call for higher pricing, not so much with clock speed
-price drops for all GPUs with 8GB of memory

Recommendations:
-Look into other specs to crack down which are best predictors for pricing
-find a good balance of specs; i.e. higher memory and prices for premium users and lower memory and prices for budget users
-possibly drop the 8GB product line if prices continue to fall

Predictions:
-increase prices as GPU memory gets up to and surpasses 16GB, increasing overall profitability
-increasing the minimum to 12GB means dropping 8GB and avoiding potential losses in profit

In [None]:
pd.set_option('display.max_rows', None)
display(df)

Diagnostic Analytics

Business Question: Which specs included in our GPUs can generate higher sale prices?

In [None]:
sql_query = '''
WITH joined_specs AS (
    SELECT
        api."productName",
        api."releaseYear",
        api."memSize",
        api."gpuClock",
        web."MSRP",
        api."unifiedShader",
        api."rop",
        api."tmu"
    FROM sql_project.gpu_specs_raw api
    JOIN sql_project.gpu_hierarchy_web_raw web
        ON api."productName" = web."Graphics Card"
    WHERE web."MSRP" IS NOT NULL
),
ranked_specs AS (
    SELECT *,
           AVG(CAST(REGEXP_REPLACE(js."MSRP", '[$,]', '', 'g') AS NUMERIC)) OVER() AS overall_avg_price,
           RANK() OVER (
                ORDER BY CAST(REGEXP_REPLACE(js."MSRP", '[$,]', '', 'g') AS NUMERIC) DESC
           ) AS price_rank
    FROM joined_specs js
)
SELECT
    rs."productName",
    rs."releaseYear",
    rs.price_rank,
    rs."memSize",
    rs."gpuClock",
    rs."unifiedShader",
    rs."rop",
    rs."tmu",
    rs."MSRP",
    rs.overall_avg_price,
    CAST(REGEXP_REPLACE(rs."MSRP", '[$,]', '', 'g') AS NUMERIC) - rs.overall_avg_price AS price_above_avg
FROM ranked_specs rs
WHERE CAST(REGEXP_REPLACE(rs."MSRP", '[$,]', '', 'g') AS NUMERIC) > rs.overall_avg_price
ORDER BY price_above_avg DESC
LIMIT 15;
'''
df = pd.read_sql_query(sql_query, con=pg_engine)
print(df)

           productName  releaseYear  price_rank  memSize  gpuClock  \
0     GeForce RTX 5090       2025.0           1     28.0       900   
1     GeForce RTX 4090       2022.0           2     24.0       975   
2     GeForce RTX 5080       2025.0           3     16.0       900   
3   Radeon RX 7900 XTX       2022.0           3     24.0       810   
4  GeForce RTX 4070 Ti       2023.0           5     12.0      1312   
5    Radeon RX 7900 XT       2022.0           6     20.0      1065   

   unifiedShader  rop  tmu    MSRP  overall_avg_price  price_above_avg  
0         8192.0  128  256  $2,000         741.428571      1258.571429  
1         7168.0   80  224  $1,600         741.428571       858.571429  
2         6912.0  192  432  $1,000         741.428571       258.571429  
3         7424.0   96  232  $1,000         741.428571       258.571429  
4         5120.0  128  320    $800         741.428571        58.571429  
5         6912.0  160  432    $750         741.428571         8.571429 

Insights:
-Good predictors: memory size, rop, release year
-Poor predictors: gpu clock, tmu, unified shader
-memory size and release year appears to be best prodictor for highest prices
-GeForce RTX 5090 has the highest price above avg at 1258
-Has more GPUs in top priced GPUs than AMD

Recomendations:
-when focusing on high end products, prioritize increasing memory size and rop to drive the highest priced sales
-Understand AMDs business model as to why our prices are much higher (are they going for a more budget friendly approach?)

Predictions:
-higher memory sizes and rop would lead to more profit in sales
-Continue to develop these specs to stay competitive and have higher prices than AMD creating more premium customer loyalty
-similarly, continue to develop lower end specs to compete with AMD's possible budget friendly approach to generate more sales