In [14]:
import pandas as pd
import psycopg2
from configparser import ConfigParser

## __About__
Taking the audited and verified tables we'd created from the prior files, we're now going to further sanitize and standardize (and potentially combine) them to be analysis-ready by the end.

In [15]:
'''
Copied my config from my 'database-upload' file for ease-of-access.
'''

def config(filename="database.ini", section="postgresql"): # keeping filename for portability, changing later
    parser = ConfigParser() # creating parser
    parser.read(filename) # reading the .ini file
    db = {} # empty dictionary for database

    if parser.has_section(section): # checking if a config section exists
        params = parser.items(section) # "
        for param in params: # reading every setting
            db[param[0]] = param[1] # applying these for later use
        
    else:
        raise Exception("Section {0} not found in the {1} file".format(section, filename))
    
    try:
        conn = psycopg2.connect(**db) # connecting to the db by bypassing the dictionary
        print("Database connected successfully.")
    except:
        print("Database not connected successfully.")
        raise

    return conn

In [16]:
conn = config(filename=r"C:\Users\Jjoer\GitHub\Pokemon Stocks\database.ini") # connecting to db

df_cards = pd.read_sql_query("SELECT * FROM cards;", conn)
df_prices = pd.read_sql_query("SELECT * FROM prices;", conn)

conn.close() # closing the cursor

Database connected successfully.


  df_cards = pd.read_sql_query("SELECT * FROM cards;", conn)
  df_prices = pd.read_sql_query("SELECT * FROM prices;", conn)


---

### *Revisiting old files*
Having to redo the whole process is redundant, so now we'll turn that into functions to condense the process.

##### 'Cards' Table:

In [17]:
def audit_and_verify_cards_table():
    replacement_values = {"artist": "Unknown Artist", "rarity": "Unknown Rarity"}
    df_replace_artist_and_rarity = df_cards.fillna(value=replacement_values)
    df_cleaned_cards = df_replace_artist_and_rarity.drop(columns=['subtypes'])
    return df_cleaned_cards

df_cards2 = audit_and_verify_cards_table()
df_cards2.head(10)

Unnamed: 0,card_id,name,supertype,set_name,series,card_number,printed_total,artist,rarity
0,hgss4-1,Aggron,Pokémon,HS—Triumphant,HeartGold & SoulSilver,1,102,Kagemaru Himeno,Rare Holo
1,xy5-1,Weedle,Pokémon,Primal Clash,XY,1,160,Midori Harada,Common
2,pl1-1,Ampharos,Pokémon,Platinum,Platinum,1,127,Atsuko Nishida,Rare Holo
3,dp3-1,Ampharos,Pokémon,Secret Wonders,Diamond & Pearl,1,132,Kouki Saitou,Rare Holo
4,det1-1,Bulbasaur,Pokémon,Detective Pikachu,Sun & Moon,1,18,MPC Film,Common
5,dv1-1,Dratini,Pokémon,Dragon Vault,Black & White,1,20,Masakazu Fukuda,Rare Holo
6,mcd19-1,Caterpie,Pokémon,McDonald's Collection 2019,Other,1,12,Sekio,Unknown Rarity
7,pl3-1,Absol G,Pokémon,Supreme Victors,Platinum,1,147,Yusuke Ishikawa,Rare Holo
8,ex12-1,Aerodactyl,Pokémon,Legend Maker,EX,1,92,Hajime Kusajima,Rare Holo
9,ex3-1,Absol,Pokémon,Dragon,EX,1,97,Naoyo Kimura,Rare Holo


In [18]:
df_cards2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19818 entries, 0 to 19817
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   card_id        19818 non-null  object
 1   name           19818 non-null  object
 2   supertype      19818 non-null  object
 3   set_name       19818 non-null  object
 4   series         19818 non-null  object
 5   card_number    19818 non-null  object
 6   printed_total  19818 non-null  object
 7   artist         19818 non-null  object
 8   rarity         19818 non-null  object
dtypes: object(9)
memory usage: 1.4+ MB


---

#### 'Prices' Table

In [19]:
def audit_and_verify_prices_table():
    df_prices['market_price'] = df_prices['market_price'].fillna(df_prices['mid_price']) # filling all missing 'market_price' with mid_price for accuracy's sake
    df_dropped_prices = df_prices.drop(columns=["source", "condition_txt", "raw_json", "created_at"])
    return df_dropped_prices

df_prices2 = audit_and_verify_prices_table()
df_prices2.head(10)

Unnamed: 0,price_id,card_id,variant,updated_at,market_price,low_price,mid_price,high_price
0,1116,dp5-19,reverseHolofoil,2025-10-16,14.83,7.78,10.28,22.06
1,1117,sm5-28,normal,2025-10-16,0.19,0.09,0.21,2.0
2,1118,sm5-28,reverseHolofoil,2025-10-16,0.47,0.25,0.41,1.49
3,1119,np-18,holofoil,2025-10-16,14.27,8.01,11.26,18.49
4,1120,sm75-26,reverseHolofoil,2025-10-16,3.06,2.0,2.99,4.0
5,1121,sm75-26,normal,2025-10-16,0.56,0.27,0.66,4.5
6,1122,gym2-29,1stEdition,2025-10-16,163.73,99.99,199.99,299.99
7,1123,gym2-29,unlimited,2025-10-16,83.46,61.0,81.49,560.96
8,1124,xy5-26,normal,2025-10-16,3.23,0.54,2.0,4.88
9,1125,xy5-26,reverseHolofoil,2025-10-16,11.42,2.96,15.61,49.99
