In [19]:
import os
import pandas as pd

def load_and_combine_csvs(folder_path):
    csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
    dataframe_list = []

    for csv_file in csv_files:
        file_path = os.path.join(folder_path, csv_file)
        df = pd.read_csv(file_path)
        dataframe_list.append(df)

    # Concatenate all DataFrames, ignoring index to avoid index conflicts
    combined_df = pd.concat(dataframe_list, axis=0, ignore_index=True, sort=False)
    
    return combined_df

folder = 'raw_data'
combined_dataframe = load_and_combine_csvs(folder)

df = combined_dataframe.copy()
df


Unnamed: 0,id,name,description,price_sek,price_eur,category,brand,country,volume,alcohol_percentage,...,producer,year,volume_detail,alcohol_detail,detailed_description,detailed_price,image_urls,food_pairing.1,character.1,grape_variety.1
0,108779,Bottega Amarone della Valpolicella,"Amarone is a full bodied, balanced and pleasan...",29.95,,,Bottega,,,,...,,,,,,,,,,
1,108556,Bottega Cabernet delle Venezie IGT,Cabernet Sauvignon Igt delle Venezie is a powe...,7.50,,,Bottega,,,,...,,,,,,,,,,
2,108843,Bottega Chianti Classico D.O.C.G.,,14.50,,,Bottega,,,,...,,,,,,,,,,
3,118584,Bottega Collio Pinot Grigio,,12.50,,,Bottega,,,,...,,,,,,,,,,
4,118035,Bottega Pinot Grigio delle Venezie I.G.T.,Pinot Grigio Igt delle Venezie is a dry wine w...,8.50,,,Bottega,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
422,108529,Tommasi Graticcio Appassionato BIB,"Kryddig smak med inslag av fat, körsbär, kanel...",20.95,,,Tommasi Viticoltori,,,,...,,,,,,,,,,
423,109700,Tommasi La Prunee - Merlot,"Medelfyllig, medelhöga tanniner, kryddig, bärig",10.95,,,Tommasi Viticoltori,,,,...,,,,,,,,,,
424,118540,Tommasi Le Fornaci - Lugana DOC,"Torr, medelhög syra, grapefruktskaraktär, fruktig",12.95,,,Tommasi Viticoltori,,,,...,,,,,,,,,,
425,118534,Tommasi Le Volpare Soave Classico,En blek halmgul färg med lysande gröna toner. ...,9.95,,,Tommasi Viticoltori,,,,...,,,,,,,,,,


In [20]:
df.info()

# show nan columns
nan_columns = df.columns[df.isna().any()].tolist()
print("Columns with NaN values:")
nan_columns

df = df.dropna(axis=1, how='all')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 427 entries, 0 to 426
Data columns (total 29 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   id                    427 non-null    int64  
 1   name                  427 non-null    object 
 2   description           345 non-null    object 
 3   price_sek             427 non-null    float64
 4   price_eur             0 non-null      float64
 5   category              0 non-null      float64
 6   brand                 427 non-null    object 
 7   country               0 non-null      float64
 8   volume                0 non-null      float64
 9   alcohol_percentage    0 non-null      float64
 10  image_url             427 non-null    object 
 11  in_stock              0 non-null      float64
 12  article_number        0 non-null      float64
 13  detailed_title        20 non-null     object 
 14  food_pairing          314 non-null    object 
 15  character             3

In [None]:
df = df.set_index('id')
df = df.rename(columns={'price_sek': 'price_eur'})
df.sample(15)


Unnamed: 0_level_0,name,description,price_eur,brand,image_url,detailed_title,food_pairing,character,grape_variety,food_pairing.1,character.1,grape_variety.1
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
107831,Norton Privada,"Fyllig, höga tanniner, toner av mörka körsbär,...",13.5,Norton,/owif/images/107831?size=300,,Nöt Grillat Gris,Fylligt,Cabernet Sauvignon Merlot Malbec,,,
109498,Black Cottage Pinot Noir,"Intensiv doft av körsbär, jordgubbe och hallon...",16.5,Black Cottage Wines,/owif/images/109498?size=300,,,,,"Skaldjur Kyckling, kalkon Vilt Ostar",Medelfylligt,Pinot Noir
110183,Trivento Reserve Malbec,Djupt röd med violetta nyanser. Intensiva arom...,7.95,Trivento,/owif/images/110183?size=300,,Grillat Biff Gris,Fylligt,Malbec,,,
110176,Ocho dos dos,Ett Riberavin med karaktär. Ljus röd färg med ...,19.95,Juvé & Camps,/owif/images/110176?size=300,,Nöt Lamm Vilt,Fylligt,Tempranillo,,,
109888,Ramos Reserva BIB,"Dark, blue-red color. Fruity taste with hints ...",19.95,J. Portugal Ramos Vinhos,/owif/images/109888?size=300,,Lamm Pasta Grillat,Fylligt,Syrah Aragones Trincadeira,,,
117011,Torres Viña Esmeralda,"Torr, medelhög syra, citruskaraktär, persikaka...",8.5,Miguel Torres Spain Winery,/owif/images/117011?size=300,,"Asiatiskt Buffé Aperitif, sällskapsvin",Halvtorrt,Gewurtstraminer Moscatel,,,
119216,Leitz 4 Friends Riesling,"NOSE: Fruity nose with peach, apricot and appl...",10.5,Leitz,/owif/images/119216?size=300,,"Sallader, vegetariskt Fiskrätter Aperitif, säl...",Torrt,Riesling,,,
119599,Riley's Rows Chardonnay,,39.95,Riley's Rows,/owif/images/119599?size=300,,"Kyckling, kalkon Fiskrätter Gris",Torrt,,,,
127280,Zinzula Masseria Altemura,"En fantastisk torr, fruktig och frisk rosé på ...",11.5,Masseria Altemura,/owif/images/127280?size=300,,"Skaldjur Fiskrätter Aperitif, sällskapsvin",Torrt,Negroamaro,,,
110054,Baron de Ley Gran Reserva Magnum,"Vinet är behagligt rubinrött, vid glaskanten n...",49.95,Baron de Ley,/owif/images/110054?size=300,Pardon Our Interruption,,,,Nöt Lamm Grillat,Fylligt,Tempranillo


In [22]:
nan_rows = df[df.isna().any(axis=1)]
nan_rows

Unnamed: 0_level_0,name,description,price_eur,brand,image_url,detailed_title,food_pairing,character,grape_variety,food_pairing.1,character.1,grape_variety.1
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
108779,Bottega Amarone della Valpolicella,"Amarone is a full bodied, balanced and pleasan...",29.95,Bottega,/owif/images/108779?size=300,,,,,,,
108556,Bottega Cabernet delle Venezie IGT,Cabernet Sauvignon Igt delle Venezie is a powe...,7.50,Bottega,/owif/images/108556?size=300,,,,,,,
108843,Bottega Chianti Classico D.O.C.G.,,14.50,Bottega,/owif/images/108843?size=300,,,,,,,
118584,Bottega Collio Pinot Grigio,,12.50,Bottega,/owif/images/118584?size=300,,,,,,,
118035,Bottega Pinot Grigio delle Venezie I.G.T.,Pinot Grigio Igt delle Venezie is a dry wine w...,8.50,Bottega,/owif/images/118035?size=300,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
108529,Tommasi Graticcio Appassionato BIB,"Kryddig smak med inslag av fat, körsbär, kanel...",20.95,Tommasi Viticoltori,/owif/images/108529?size=300,,Nöt Lamm Grillat,Fylligt,,,,
109700,Tommasi La Prunee - Merlot,"Medelfyllig, medelhöga tanniner, kryddig, bärig",10.95,Tommasi Viticoltori,/owif/images/109700?size=300,,"Nöt Kyckling, kalkon Gris Buffé",Medelfylligt,Merlot,,,
118540,Tommasi Le Fornaci - Lugana DOC,"Torr, medelhög syra, grapefruktskaraktär, fruktig",12.95,Tommasi Viticoltori,/owif/images/118540?size=300,,"Sallader, vegetariskt Skaldjur Fiskrätter",Torrt,Turbiana,,,
118534,Tommasi Le Volpare Soave Classico,En blek halmgul färg med lysande gröna toner. ...,9.95,Tommasi Viticoltori,/owif/images/118534?size=300,,Skaldjur Fiskrätter Sushi,Torrt,Garganega,,,
