In [25]:
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load dataset
df = pd.read_csv("MTA_Permanent_Art_Catalog__Beginning_1980_20250927.csv")
df.head()

Unnamed: 0,Agency,Station Name,Line,Artist,Art Title,Art Date,Art Material,Art Description,Art Image Link
0,NYCT,Clark St,23,Ray Ring,Clark Street Passage,1987,Terrazzo floor tile,The first model that Brooklyn-born artist Ray ...,https://new.mta.info/agency/arts-design/collec...
1,NYCT,125 St,456,Houston Conwill,The Open Secret,1986,Bronze - polychromed,"The Open Secret, in the 125th Street and Lexin...",https://new.mta.info/agency/arts-design/collec...
2,NYCT,Astor Pl,6,Milton Glaser,Untitled,1986,Porcelain enamel murals,"Milton Glaser, best known for his work in grap...",https://new.mta.info/agency/arts-design/collec...
3,NYCT,Kings Hwy,"B,Q",Rhoda Andors,Kings Highway Hieroglyphs,1987,Porcelain Enamel Murals on Steel,The artist discusses her work: ÒIf public art...,https://new.mta.info/agency/arts-design/collec...
4,NYCT,Newkirk Av,"B,Q",David Wilson,Transit Skylight,1988,Zinc-glazed Apolycarbonate skylight,"The artist recalls, ÒAbout the same time that ...",https://new.mta.info/agency/arts-design/collec...


In [26]:
df = df.dropna()  # drop rows with any missing values

df = df.drop_duplicates(subset= ["Station Name"]) # drop duplicate rows

# Drop unwanted columns
df = df.drop(columns=["Art Description", "Art Image Link"])

# Apply strip + lowercase to all object (string) columns
for col in df.select_dtypes(include="object").columns:
    df[col] = df[col].astype(str).str.strip().str.lower()
df.head()

Unnamed: 0,Agency,Station Name,Line,Artist,Art Title,Art Date,Art Material
0,nyct,clark st,23,ray ring,clark street passage,1987,terrazzo floor tile
1,nyct,125 st,456,houston conwill,the open secret,1986,bronze - polychromed
2,nyct,astor pl,6,milton glaser,untitled,1986,porcelain enamel murals
3,nyct,kings hwy,"b,q",rhoda andors,kings highway hieroglyphs,1987,porcelain enamel murals on steel
4,nyct,newkirk av,"b,q",david wilson,transit skylight,1988,zinc-glazed apolycarbonate skylight


In [27]:
# Make tidy: split multiple lines into separate rows
df = df.assign(Line=df['Line'].str.split(',')).explode('Line').reset_index(drop=True)
print(df.info)

<bound method DataFrame.info of     Agency               Station Name Line              Artist  \
0     nyct                   clark st    2            ray ring   
1     nyct                   clark st    3            ray ring   
2     nyct                     125 st    4     houston conwill   
3     nyct                     125 st    5     houston conwill   
4     nyct                     125 st    6     houston conwill   
..     ...                        ...  ...                 ...   
531   nyct  14 st-6 av and 14 st-7 av    m      fred tomaselli   
532   nyct  14 st-6 av and 14 st-7 av    1      fred tomaselli   
533   nyct  14 st-6 av and 14 st-7 av    2      fred tomaselli   
534   nyct  14 st-6 av and 14 st-7 av    3      fred tomaselli   
535   nyct       68 st-hunter college    6  lisa corinne davis   

                                 Art Title  Art Date          Art Material  
0                     clark street passage      1987   terrazzo floor tile  
1                    

In [28]:
print(df.head)

<bound method NDFrame.head of     Agency               Station Name Line              Artist  \
0     nyct                   clark st    2            ray ring   
1     nyct                   clark st    3            ray ring   
2     nyct                     125 st    4     houston conwill   
3     nyct                     125 st    5     houston conwill   
4     nyct                     125 st    6     houston conwill   
..     ...                        ...  ...                 ...   
531   nyct  14 st-6 av and 14 st-7 av    m      fred tomaselli   
532   nyct  14 st-6 av and 14 st-7 av    1      fred tomaselli   
533   nyct  14 st-6 av and 14 st-7 av    2      fred tomaselli   
534   nyct  14 st-6 av and 14 st-7 av    3      fred tomaselli   
535   nyct       68 st-hunter college    6  lisa corinne davis   

                                 Art Title  Art Date          Art Material  
0                     clark street passage      1987   terrazzo floor tile  
1                     c