In [11]:
## IMPORTS

# External modules
import pandas as pd
import re
from collections import Counter
import matplotlib.pyplot as plt
import time
import ast
# Models
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import r2_score, mean_absolute_error, precision_score, recall_score, f1_score

# AI
import openai

# Project modules
import filter

In [12]:
## FUNCTIONS


## DATAFRAME CREATION
def get_artworks_df():
    artworks = pd.read_csv('../temporary-files/artsoul_artworks_info.csv')
    artworks.rename(columns=lambda x: x.title(), inplace=True)
    artworks = calculate_area_and_price_per_area(artworks)
    print(artworks.columns)
    for column_name in ['Técnicas', 'Temas', 'Cores']:
    
        artworks[column_name] = artworks[column_name].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else '')
    # for column_name in ['Title']:
    #     artworks[column_name] = artworks[column_name].apply(lambda x: [str(value.strip()) for value in x.split(' ')])
    # Remove outliers (artworks with price / cm² in the 0.5% and 99.5% percentiles)
    artworks = artworks[artworks['Price / cm²'] > artworks['Price / cm²'].quantile(0.05)]
    artworks = artworks[artworks['Price / cm²'] < artworks['Price / cm²'].quantile(0.95)]
    return artworks

def get_artists_df(artworks_df):
    agg_df = artworks_df.dropna(subset=['Price'])
    agg_data = agg_df.groupby('Artist').agg({'Artist': 'count',
                                            'Price': ['mean', lambda x: x.std(skipna=False)],
                                            'Price / cm²': ['mean', lambda x: x.std(skipna=False)],
                                            'Size': ['mean', lambda x: x.std(skipna=False)]})
    agg_data = agg_data.reset_index()

    # Flatten the column names
    agg_data.columns = ['Artist', 'NumArtworks', 'MeanPrice', 'StdPrice', 'MeanPricePerInch', 'StdPricePerInch', 'MeanSize', 'StdSize']

    # Create a new DataFrame with the aggregated data from artworks_info
    artists_from_artworks = pd.DataFrame(agg_data)
    artists_from_artworks = artists_from_artworks[artists_from_artworks['NumArtworks'] >= 3]
    # Get artists data from artists_info
    artists_from_artists = pd.read_json('../temporary-files/saatchi_artists_info_clean.json')
    artists_from_artists.rename(columns=lambda x: x.title(), inplace=True)
    artists_from_artists.rename(columns={'Name': 'Artist'}, inplace=True)

    # New dataframe with all artists that are contained in both dataframes
    artists_from_artists = artists_from_artists[artists_from_artists['Artist'].isin(artists_from_artworks['Artist'])]
    artists = pd.merge(artists_from_artworks, artists_from_artists, on='Artist')

    return artists

def calculate_area_and_price_per_area(dataframe):
    dataframe['Size'] = dataframe['Height'] * dataframe['Width']
    dataframe['Price / cm²'] = dataframe['Price'] / dataframe['Size']
    return dataframe

def get_unique_values(df, column_name):
    # Extract all unique styles from the column
    unique_values = set()
    for styles in df[column_name]:
        if isinstance(styles, str):
            styles_list = [style.strip() for style in styles.split(',')]
            unique_values.update(styles_list)
    
    return unique_values

def check_columns(df, columns_names):
    for column in columns_names:
        df[column] = df[column].str.title()
        unique_list = get_unique_values(df, column)
        print(column, len(unique_list), 'unique values')

def restart_df(dataframe):
    df = calculate_area_and_price_per_area(dataframe)
    df = df[['Styles', 'Mediums', 'Subjects', 'Artist', 'Size', 'Price']]
    df = df.dropna(subset='Styles')
    columns_names = ['Styles', 'Mediums', 'Subjects']

    # Turn the "Styles" column into a list of strings
    for column in columns_names:
        column_serie = df[column].apply(lambda x: x.split(','))
        df[column] = column_serie
        return df

## CLEAN DATA
def remove_words_from_list(lst, words_to_remove):
    return [word for word in lst if word not in words_to_remove]

## OOOOLD
def fix_column(dataframe, column_name, fix_dict, remove_list, split_list):
    # Remove whitespaces from the beginning and end of each string
    dataframe[column_name] = dataframe[column_name].apply(lambda lst: [item.strip() for item in lst])
    # Change '-' to whitespaces in each string
    dataframe[column_name] = dataframe[column_name].apply(lambda lst: [item.replace('-', ' ') for item in lst])
    # Capitalize each string
    dataframe[column_name] = dataframe[column_name].apply(lambda lst: [item.title() for item in lst])
    # Remove ' Art' and ' Painting' from the end of each string
    dataframe[column_name] = dataframe[column_name].apply(lambda lst: [style.replace(' Art', '') for style in lst])
    dataframe[column_name] = dataframe[column_name].apply(lambda lst: [style.replace(' Painting', '') for style in lst])
    # Splits items present the split_list into separate items
    dataframe[column_name] = dataframe[column_name].apply(lambda lst: [item for item in lst if item not in split_list] + [item for item in lst for split_item in split_list if split_item in item])
    return dataframe

## NEEEW
def fix_based_on_dict(column, fix_dict):
    column = column.apply(lambda lst: [next((key for key, values in fix_dict.items() if item in values), item) for item in lst])
    return column

def remove_words_from_column(column, remove_list):
    column = column.apply(lambda lst: remove_words_from_list(lst, remove_list))
    return column

def get_occurrence_count_on_col_dict(values):
    # Get all unique values from the column
    occurrences_counts_dict = dict(Counter(values))
    return occurrences_counts_dict

def group_by_segments(artworks_data, column_name, column, occurrences_threshold):

    ## GETTING ONE DF FOR SEGMENT
    segments_dfs = get_dfs_for_segments(filtered_artworks_data, column_name, occurrence_count_on_col_dict, occurrences_threshold)

    # New dataframe with unique segments as index and MeanPrice, MedianPrice, MeanSize, MedianSize as columns
    all_segments_df = pd.DataFrame(index=segments_dfs.keys(), columns=['MeanPrice', 'MedianPrice', 'MeanSize', 'MedianSize', 'MeanPricePercm²', 'MedianPricePercm²', 'Count'])
    # Populate dataframe with mean price, median price, mean size and median size for each segment
    for key, value in segments_dfs.items():
        all_segments_df.loc[key, 'MeanPrice'] = value['Price'].mean().round(0)
        all_segments_df.loc[key, 'MedianPrice'] = value['Price'].median().round(0)
        all_segments_df.loc[key, 'MeanSize'] = value['Size'].mean().round(0)
        all_segments_df.loc[key, 'MedianSize'] = value['Size'].median().round(0)
        all_segments_df.loc[key, 'MeanPricePercm²'] = value['Price / cm²'].mean().round(2)
        all_segments_df.loc[key, 'MedianPricePercm²'] = value['Price / cm²'].median().round(2)
        all_segments_df.loc[key, 'Count'] = len(value)

    all_segments_df.sort_values(by='MeanPrice', ascending=False, inplace=True)
    
    # New dataframe for each segment
    segments_dfs = {}
    for key, value in occurrence_count_on_col_dict.items():
        if value > occurrences_threshold:
            segments_dfs[key] = dataframe[dataframe[column_name].apply(lambda x: key in x)]
    return segments_dfs

def analyse_by_column(dataframe, column_name, threshold):
    artworks_count_by_segment = dataframe[column_name].value_counts()
    artworks_count_pct_by_segment = artworks_count_by_segment / dataframe[column_name].value_counts().sum()
    # filter out segments with less than [threshold] artworks
    threshold = 200
    selection = artworks_count_by_segment[artworks_count_by_segment > threshold].index
    dataframe = dataframe[dataframe[column_name].isin(selection)]
    return dataframe

def compare_segments(dataframe, segments_to_compare, x_column_name, y_column_name):
    for segment in segments_to_compare:
        print(segment)

        if segment == 'All':
            segment_df = dataframe
        else:
            segment_df = segments_dfs[segment]

        x = segment_df[[x_column_name]]
        y = segment_df[y_column_name]
        print('stats:', get_stats(segment_df, x, y))
        get_all_models(x, y)

        xlim = (0, 10000)
        ylim = (0, 40000)
        # segment_df.plot.scatter(x=segment_df[[x_column_name]], y=segment_df[y_column_name], title=segment+' artworks', figsize=(5, 3), xlim=xlim, ylim=ylim)

def segment_and_clean_data(artworks_data, column_name, occurrences_threshold):
    column = artworks_data[column_name]
    segments_in_column_list = [value for sublist in column for value in sublist]
    occurrence_count_on_col_dict = get_occurrence_count_on_col_dict(segments_in_column_list)
    
    filtered_artworks_data = remove_empty_rows(artworks_data, column)
    filtered_artworks_data = filtered_artworks_data.dropna(subset=['Price', 'Size'])
    filtered_artworks_data[column_name] = column
    
    segments_dfs = get_dataframes_for_segments(filtered_artworks_data, column_name, occurrence_count_on_col_dict, occurrences_threshold)
    all_segments_df = create_segments_dataframe(segments_dfs)
    
    return filtered_artworks_data, all_segments_df, segments_dfs

def remove_empty_rows(dataframe, column):
    return dataframe[column.apply(lambda x: len(x) > 0)]

def get_dataframes_for_segments(dataframe, column_name, occurrence_count_on_col_dict, occurrences_threshold):
    segments_dfs = {}
    for key, value in occurrence_count_on_col_dict.items():
        if value > occurrences_threshold:
            segments_dfs[key] = dataframe[dataframe[column_name].apply(lambda x: key in x)]
    return segments_dfs

def create_segments_dataframe(segments_dfs):
    all_segments_df = pd.DataFrame(index=segments_dfs.keys(),
                                   columns=['MeanPrice', 'MedianPrice', 'MeanSize', 'MedianSize',
                                            'MeanPricePercm²', 'MedianPricePercm²', 'Count'])
    for key, value in segments_dfs.items():
        all_segments_df.loc[key, 'MeanPrice'] = value['Price'].mean().round(0)
        all_segments_df.loc[key, 'MedianPrice'] = value['Price'].median().round(0)
        all_segments_df.loc[key, 'MeanSize'] = value['Size'].mean().round(0)
        all_segments_df.loc[key, 'MedianSize'] = value['Size'].median().round(0)
        all_segments_df.loc[key, 'MeanPricePercm²'] = value['Price / cm²'].mean().round(2)
        all_segments_df.loc[key, 'MedianPricePercm²'] = value['Price / cm²'].median().round(2)
        all_segments_df.loc[key, 'Count'] = len(value)
    all_segments_df.sort_values(by='MeanPrice', ascending=False, inplace=True)
    return all_segments_df

def prepare_dataframe_dummies(artworks_data, column_name, segments_dfs):
    dummies_for_segment = artworks_data[['Price', column_name]].dropna(subset=['Price'])
    
    for key, value in segments_dfs.items():
        dummies_for_segment[key] = artworks_data[column_name].apply(lambda x: True if key in x else False)
    
    return dummies_for_segment

In [13]:
## MODELS


def get_stats(x, y):
    stats = {}
    for variable in [x, y]:
        max_variable = round(max(variable), 2)
        min_variable = round(min(variable), 2)
        mean_variable = round(variable.mean(), 2)
        median_variable = round(variable.median(), 2)
        stats_variable = {'Max': max_variable, 'Min': min_variable, 'Mean': mean_variable, 'Median': median_variable}
        # add stats_variable to stats
        stats[variable] = stats_variable
    return stats


## GET MODELS

def get_decision_tree(X_train, y_train):
    decision_tree = DecisionTreeRegressor(random_state=42)
    decision_tree.fit(X_train, y_train)
    return decision_tree

def get_linear_regression(X_train, y_train):
    linear_regression = LinearRegression()
    linear_regression.fit(X_train, y_train)
    return linear_regression

def get_random_forest(X_train, y_train):
    rf_model = RandomForestRegressor(random_state=1)
    rf_model.fit(X_train, y_train)
    return rf_model

def get_gradient_boosting(X_train, y_train):
    # get gradient boosting model
    gb_model = GradientBoostingRegressor(random_state=1)
    gb_model.fit(X_train, y_train)
    return gb_model

# gives a dataframe taking models as columns and score as rows
def get_all_models(x, y):
    x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1)

    models = {'Linear regression': get_linear_regression(x_train, y_train),
              'Decision tree': get_decision_tree(x_train, y_train),
              'Random forest': get_random_forest(x_train, y_train),
              'Gradient boosting': get_gradient_boosting(x_train, y_train)}
    models_df = pd.DataFrame(columns=models.keys())
    for model_name, model in models.items():
        y_pred = model.predict(x_test)
        r2 = r2_score(y_test, y_pred)
        n = len(y_test)
        p = x_test.shape[1]
        adj_r2 = 1 - (1 - r2) * (n - 1) / (n - p - 1)
        mean_error = mean_absolute_error(y_test, y_pred)

        models_df.loc['score', model_name] = r2
        models_df.loc['adjusted score', model_name] = adj_r2
        models_df.loc['mean error', model_name] = mean_error

    return models_df, models

In [14]:

# Load the data
artworks_df = get_artworks_df()
artworks_df


Index(['Url', 'Price', 'Artist', 'Artist_Url', 'Title', 'Description',
       'Height', 'Width', 'Depth', 'Location', 'Year', 'Técnicas', 'Temas',
       'Cores', 'Gallery', 'Image_Url', 'Size', 'Price / cm²'],
      dtype='object')


Unnamed: 0,Url,Price,Artist,Artist_Url,Title,Description,Height,Width,Depth,Location,Year,Técnicas,Temas,Cores,Gallery,Image_Url,Size,Price / cm²
0,https://artsoul.com.br/obras/mosca,1200.0,Marcia C. Cavalcanti,https://artsoul.com.br/artistas/marcia-c-caval...,Mosca,Pintura - óleo s/ tela,30.0,40.0,,RIO DE JANEIRO,2018.0,[Pintura],"[Contemporâneo, Figurativo, Pop, Moderno, Conc...",[Colorido],Zagut,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,1200.0,1.000000
1,https://artsoul.com.br/obras/sem-titulo-1501,3500.0,Fabrício Lopez,https://artsoul.com.br/artistas/fabricio-lopez,Sem título,"sem título, monotipia, PU (prova única), 2007,...",57.0,53.0,,SÃO PAULO,2007.0,[Gravura],"[Contemporâneo, Abstrato]",[Colorido],Gravura Brasileira,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,3021.0,1.158557
2,https://artsoul.com.br/obras/bye-bye-dubai,2300.0,Francisco Maringelli,https://artsoul.com.br/artistas/francisco-mari...,"Bye, bye Dubai","Francisco Maringelli, xilogravura de fio sobre...",50.0,70.0,,SÃO PAULO,2010.0,,,,Gravura Brasileira,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,3500.0,0.657143
3,https://artsoul.com.br/obras/on-my-way,23000.0,Anna Paola Protasio,https://artsoul.com.br/artistas/anna-paola-pro...,On My Way,Alumínio e grampos,103.0,144.0,,SÃO PAULO,2019.0,[Técnica Mista],[Contemporâneo],[Monocromático],Arteformatto,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,14832.0,1.550701
4,https://artsoul.com.br/obras/sem-titulo-2320,1950.0,ADRIANA BRZEZINSKA,https://artsoul.com.br/artistas/adriana-brzezi...,Sem título,Mista sobre tela,70.0,50.0,5.0,CURITIBA,2019.0,[Outra Técnica],[Contemporâneo],,Zilda Fraletti,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,3500.0,0.557143
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
372,https://artsoul.com.br/obras/lagoa,8800.0,Mario Baptista,https://artsoul.com.br/artistas/mario-baptista,Lagoa,Impressão em jato de tinta sobre papel algodão...,120.0,80.0,,SÃO PAULO,2021.0,[Fotografia],"[Contemporâneo, Paisagem]",[Colorido],Mario Baptista,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,9600.0,0.916667
373,https://artsoul.com.br/obras/quarentena-2-1,1897.0,Rose Aguiar,https://artsoul.com.br/artistas/rose-aguiar,Quarentena 2,Fotografia digital impressa em papel Canson 25...,72.0,54.0,,RIO DE JANEIRO,2020.0,[Fotografia],"[Abstrato, Contemporâneo, Conceitual, Outro Te...",[Monocromático],Zagut,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,3888.0,0.487912
375,https://artsoul.com.br/obras/s-titulo-81,9000.0,Lu Guedes,https://artsoul.com.br/artistas/lu-guedes,s/ título,Acrílica s/tela - s/ moldura,58.0,58.0,,RIO DE JANEIRO,2018.0,[Técnica Mista],"[Outro Tema, Abstrato, Contemporâneo]",[Colorido],Zagut,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,3364.0,2.675386
377,https://artsoul.com.br/obras/da-serie-pequenos...,1000.0,Helena Freddi,https://artsoul.com.br/artistas/helena-freddi,da série Pequenos Vícios,"Da série Pequenos Vícios, gravura em metal e m...",20.0,20.0,,SÃO PAULO,2010.0,[Gravura],"[Figurativo, Contemporâneo]",[Colorido],Gravura Brasileira,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,400.0,2.500000


In [15]:
artworks_df['Marketplace'] = 'Artsoul'
# save artworks_df as csv file
artworks_df.to_csv('../temporary-files/artsoul_artworks_info.csv', index=False)

In [16]:
# unique values on column
column = 'Temas'
artworks_df

Unnamed: 0,Url,Price,Artist,Artist_Url,Title,Description,Height,Width,Depth,Location,Year,Técnicas,Temas,Cores,Gallery,Image_Url,Size,Price / cm²,Marketplace
0,https://artsoul.com.br/obras/mosca,1200.0,Marcia C. Cavalcanti,https://artsoul.com.br/artistas/marcia-c-caval...,Mosca,Pintura - óleo s/ tela,30.0,40.0,,RIO DE JANEIRO,2018.0,[Pintura],"[Contemporâneo, Figurativo, Pop, Moderno, Conc...",[Colorido],Zagut,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,1200.0,1.000000,Artsoul
1,https://artsoul.com.br/obras/sem-titulo-1501,3500.0,Fabrício Lopez,https://artsoul.com.br/artistas/fabricio-lopez,Sem título,"sem título, monotipia, PU (prova única), 2007,...",57.0,53.0,,SÃO PAULO,2007.0,[Gravura],"[Contemporâneo, Abstrato]",[Colorido],Gravura Brasileira,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,3021.0,1.158557,Artsoul
2,https://artsoul.com.br/obras/bye-bye-dubai,2300.0,Francisco Maringelli,https://artsoul.com.br/artistas/francisco-mari...,"Bye, bye Dubai","Francisco Maringelli, xilogravura de fio sobre...",50.0,70.0,,SÃO PAULO,2010.0,,,,Gravura Brasileira,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,3500.0,0.657143,Artsoul
3,https://artsoul.com.br/obras/on-my-way,23000.0,Anna Paola Protasio,https://artsoul.com.br/artistas/anna-paola-pro...,On My Way,Alumínio e grampos,103.0,144.0,,SÃO PAULO,2019.0,[Técnica Mista],[Contemporâneo],[Monocromático],Arteformatto,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,14832.0,1.550701,Artsoul
4,https://artsoul.com.br/obras/sem-titulo-2320,1950.0,ADRIANA BRZEZINSKA,https://artsoul.com.br/artistas/adriana-brzezi...,Sem título,Mista sobre tela,70.0,50.0,5.0,CURITIBA,2019.0,[Outra Técnica],[Contemporâneo],,Zilda Fraletti,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,3500.0,0.557143,Artsoul
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
372,https://artsoul.com.br/obras/lagoa,8800.0,Mario Baptista,https://artsoul.com.br/artistas/mario-baptista,Lagoa,Impressão em jato de tinta sobre papel algodão...,120.0,80.0,,SÃO PAULO,2021.0,[Fotografia],"[Contemporâneo, Paisagem]",[Colorido],Mario Baptista,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,9600.0,0.916667,Artsoul
373,https://artsoul.com.br/obras/quarentena-2-1,1897.0,Rose Aguiar,https://artsoul.com.br/artistas/rose-aguiar,Quarentena 2,Fotografia digital impressa em papel Canson 25...,72.0,54.0,,RIO DE JANEIRO,2020.0,[Fotografia],"[Abstrato, Contemporâneo, Conceitual, Outro Te...",[Monocromático],Zagut,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,3888.0,0.487912,Artsoul
375,https://artsoul.com.br/obras/s-titulo-81,9000.0,Lu Guedes,https://artsoul.com.br/artistas/lu-guedes,s/ título,Acrílica s/tela - s/ moldura,58.0,58.0,,RIO DE JANEIRO,2018.0,[Técnica Mista],"[Outro Tema, Abstrato, Contemporâneo]",[Colorido],Zagut,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,3364.0,2.675386,Artsoul
377,https://artsoul.com.br/obras/da-serie-pequenos...,1000.0,Helena Freddi,https://artsoul.com.br/artistas/helena-freddi,da série Pequenos Vícios,"Da série Pequenos Vícios, gravura em metal e m...",20.0,20.0,,SÃO PAULO,2010.0,[Gravura],"[Figurativo, Contemporâneo]",[Colorido],Gravura Brasileira,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,400.0,2.500000,Artsoul


In [17]:
## MODEL FOR SIZE, STYLES, MEDIUMS, MATERIALS AND SUBJECTS

dummies_for_all_segments = pd.DataFrame()

columns_names = ['Temas', 'Técnicas', 'Cores']

# get one df with dummies for styles, mediums, materials and subjects
for column_name in columns_names:
    column = artworks_df[column_name]
    occurrences_threshold = 0
    filtered_artworks_data, all_segments_df, segments_dfs = segment_and_clean_data(artworks_df, column_name, occurrences_threshold)
    dummies_for_segment = prepare_dataframe_dummies(artworks_df, column_name, segments_dfs).drop(columns=['Price'])
    # concat dummies_for_segment to dummies_for_all_segments
    dummies_for_all_segments = pd.concat([dummies_for_all_segments, dummies_for_segment], axis=1)

dummies_for_all_segments.drop(columns=columns_names, inplace=True)

# add price, size, country
columns_to_add = ['Size', 'Price']
for column_name in columns_to_add:
    dummies_for_all_segments[column_name] = artworks_df[column_name]

# apply models to dummies_for_all_segments
x = dummies_for_all_segments.drop(columns=['Price'])
y = dummies_for_all_segments['Price']

models_output = get_all_models(x.values, y)
models_df = models_output[0]
models = models_output[1]
models_df

Unnamed: 0,Linear regression,Decision tree,Random forest,Gradient boosting
score,0.463293,0.631921,0.656174,0.570079
adjusted score,0.132133,0.404809,0.444026,0.304809
mean error,2828.670272,2024.415584,2265.032338,2315.343734


In [24]:
# add columns from artworks: ['Artist', 'Title', 'Image_URL', 'Marketplace']
df = dummies_for_all_segments
columns_to_add = ['Artist', 'Price', 'Title', 'Image_Url', 'Marketplace']
for column in columns_to_add:
    df[column] = artworks_df[column]

df.to_csv('artsoul_artworks_info.csv', index=False)
df

Unnamed: 0,Contemporâneo,Figurativo,Pop,Moderno,Conceitual,Abstrato,Retrato,Surrealista,Outro Tema,Paisagem,...,Fotografia,Livro,Colorido,Monocromático,Size,Price,Artist,Title,Image_Url,Marketplace
0,True,True,True,True,True,False,False,False,False,False,...,False,False,True,False,1200.0,1200.0,Marcia C. Cavalcanti,Mosca,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,Artsoul
1,True,False,False,False,False,True,False,False,False,False,...,False,False,True,False,3021.0,3500.0,Fabrício Lopez,Sem título,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,Artsoul
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,3500.0,2300.0,Francisco Maringelli,"Bye, bye Dubai",https://artsoul.nyc3.cdn.digitaloceanspaces.co...,Artsoul
3,True,False,False,False,False,False,False,False,False,False,...,False,False,False,True,14832.0,23000.0,Anna Paola Protasio,On My Way,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,Artsoul
4,True,False,False,False,False,False,False,False,False,False,...,False,False,False,False,3500.0,1950.0,ADRIANA BRZEZINSKA,Sem título,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,Artsoul
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
372,True,False,False,False,False,False,False,False,False,True,...,True,False,True,False,9600.0,8800.0,Mario Baptista,Lagoa,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,Artsoul
373,True,False,False,False,True,True,False,False,True,True,...,True,False,False,True,3888.0,1897.0,Rose Aguiar,Quarentena 2,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,Artsoul
375,True,False,False,False,False,True,False,False,True,False,...,False,False,True,False,3364.0,9000.0,Lu Guedes,s/ título,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,Artsoul
377,True,True,False,False,False,False,False,False,False,False,...,False,False,True,False,400.0,1000.0,Helena Freddi,da série Pequenos Vícios,https://artsoul.nyc3.cdn.digitaloceanspaces.co...,Artsoul


In [19]:
x.columns
x.to_csv('./models/artsoul_x.csv', index=False)
y.to_csv('./models/artsoul_y.csv', index=False)

In [20]:
for segment_df in segments_dfs:
    print(segment_df)

Colorido
Monocromático
