# Objetos de cobre en el Reino Antiguo

In [None]:
# Importar las librerías, modulos necesarios para el trabajo

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

def distrib_cat(df, columnas_categoricas, relativa=False, mostrar_valores=False):
    num_columnas = len(columnas_categoricas)
    num_filas = (num_columnas // 2) + (num_columnas % 2)

    fig, axes = plt.subplots(num_filas, 2, figsize=(15, 5 * num_filas))
    axes = axes.flatten() 

    for i, col in enumerate(columnas_categoricas):
        ax = axes[i]
        if relativa:
            total = df[col].value_counts().sum()
            serie = df[col].value_counts().apply(lambda x: x / total)
            sns.barplot(x=serie.index, y=serie, ax=ax, palette='viridis', hue = serie.index, legend = False)
            ax.set_ylabel('Frecuencia Relativa')
        else:
            serie = df[col].value_counts()
            sns.barplot(x=serie.index, y=serie, ax=ax, palette='viridis', hue = serie.index, legend = False)
            ax.set_ylabel('Frecuencia')

        ax.set_title(f'Distribución de {col}')
        ax.set_xlabel('')
        ax.tick_params(axis='x', rotation=45)

        if mostrar_valores:
            for p in ax.patches:
                height = p.get_height()
                ax.annotate(f'{height:.2f}', (p.get_x() + p.get_width() / 2., height), 
                            ha='center', va='center', xytext=(0, 9), textcoords='offset points')

    for j in range(i + 1, num_filas * 2):
        axes[j].axis('off')

    plt.tight_layout()
    plt.show()

def plot_multiple_boxplots(df, columns, dim_matriz_visual = 2):
    num_cols = len(columns)
    num_rows = num_cols // dim_matriz_visual + num_cols % dim_matriz_visual
    fig, axes = plt.subplots(num_rows, dim_matriz_visual, figsize=(12, 6 * num_rows))
    axes = axes.flatten()

    for i, column in enumerate(columns):
        if df[column].dtype in ['int64', 'float64']:
            sns.boxplot(data=df, x=column, ax=axes[i])
            axes[i].set_title(column)

    # Ocultar ejes vacíos
    for j in range(i+1, num_rows * 2):
        axes[j].axis('off')

    plt.tight_layout()
    plt.show()

def plot_boxplot_grouped(df, column_to_plot, group_column):
    if df[column_to_plot].dtype in ['int64', 'float64'] and df[group_column].dtype in ['object', 'category']:
        sns.boxplot(data=df, x=group_column, y=column_to_plot)
        plt.show()

def variabilidad(df):
    df_var = df.describe().loc[["std", "mean"]].T
    df_var["CV"] = df_var["std"]/df_var["mean"]
    return df_var

def plot_categorical_relationship_fin(df, cat_col1, cat_col2, relative_freq=False, show_values=False, size_group = 5):
    # Prepara los datos
    count_data = df.groupby([cat_col1, cat_col2]).size().reset_index(name='count')
    total_counts = df[cat_col1].value_counts()
    
    # Convierte a frecuencias relativas si se solicita
    if relative_freq:
        count_data['count'] = count_data.apply(lambda x: x['count'] / total_counts[x[cat_col1]], axis=1)

    # Si hay más de size_group categorías en cat_col1, las divide en grupos de size_group
    unique_categories = df[cat_col1].unique()
    if len(unique_categories) > size_group:
        num_plots = int(np.ceil(len(unique_categories) / size_group))

        for i in range(num_plots):
            # Selecciona un subconjunto de categorías para cada gráfico
            categories_subset = unique_categories[i * size_group:(i + 1) * size_group]
            data_subset = count_data[count_data[cat_col1].isin(categories_subset)]

            # Crea el gráfico
            plt.figure(figsize=(10, 6))
            ax = sns.barplot(x=cat_col1, y='count', hue=cat_col2, data=data_subset, order=categories_subset)

            # Añade títulos y etiquetas
            plt.title(f'Relación entre {cat_col1} y {cat_col2} - Grupo {i + 1}')
            plt.xlabel(cat_col1)
            plt.ylabel('Frecuencia' if relative_freq else 'Conteo')
            plt.xticks(rotation=90)

            # Mostrar valores en el gráfico
            if show_values:
                for p in ax.patches:
                    ax.annotate(f'{p.get_height():.2f}', (p.get_x() + p.get_width() / 2., p.get_height()),
                                ha='center', va='center', fontsize=10, color='black', xytext=(0, size_group),
                                textcoords='offset points')

            # Muestra el gráfico
            plt.show()
    else:
        # Crea el gráfico para menos de size_group categorías
        plt.figure(figsize=(10, 6))
        ax = sns.barplot(x=cat_col1, y='count', hue=cat_col2, data=count_data)

        # Añade títulos y etiquetas
        plt.title(f'Relación entre {cat_col1} y {cat_col2}')
        plt.xlabel(cat_col1)
        plt.ylabel('Frecuencia' if relative_freq else 'Conteo')
        plt.xticks(rotation=45)

        # Mostrar valores en el gráfico
        if show_values:
            for p in ax.patches:
                ax.annotate(f'{p.get_height():.2f}', (p.get_x() + p.get_width() / 2., p.get_height()),
                            ha='center', va='center', fontsize=10, color='black', xytext=(0, size_group),
                            textcoords='offset points')

        # Muestra el gráfico
        plt.show()

In [None]:
# 1. Lipiar los datos
    # Cargar los datos con pandas

df_adzes = pd.read_excel("/Users/bastiensegalas/Desktop/EDA/data/adzes.xlsx")
df_adzes

df_axes = pd.read_excel("/Users/bastiensegalas/Desktop/EDA/data/axes.xlsx")
df_axes

df_chisels= pd.read_excel("/Users/bastiensegalas/Desktop/EDA/data/chisels.xlsx")
df_chisels

df_mirrors = pd.read_excel("/Users/bastiensegalas/Desktop/EDA/data/mirrors.xlsx")
df_mirrors

df_saws = pd.read_excel("/Users/bastiensegalas/Desktop/EDA/data/saws.xlsx")
df_saws

df_needles = pd.read_excel("/Users/bastiensegalas/Desktop/EDA/data/needles.xlsx")
df_needles

In [None]:
# 2. Guardar las columnas que nos interesan para cada clase se objetos

df_adzes_new = df_adzes[['adzeID', 'Context_artefacts::Site', 'Category', 'Context_artefacts::Region', 'Context_artefacts::Period', 'Context_artefacts::Type of context',
                         'Type', 'Length of artefact', 'Width of artefact', 'Thickness of artefact', 'Lower width of butt', 'Width of butt', 'Thickness of butt',
                         'Lower thickness of butt', 'Thickness of blade', 'Lower width of blade', 'Width of blade', 'Lower thickness of blade', 'Weight']]

df_adzes_new

df_axes_new = df_axes[['axeID', 'Context_artefacts::Site', 'Context_artefacts::Region', 'Category', 'Context_artefacts::Period', 'Context_artefacts::Type of context',
                       'Type', 'Heigth of artefact', 'Maximal width', 'Maximal thickness', 'Width of blade', 'Thickness of blade', 'Weight']]

df_axes_new

df_chisels_new = df_chisels[['chiselID', 'Context_artefacts::Site', 'Context_artefacts::Region', 'Category', 'Context_artefacts::Period', 
                             'Context_artefacts::Type of context', 'Type', 'Length of chisel', 'Thickness of blade', 'Width of blade', 'Maximal width',
                             'Maximal thickness', 'Weight']]

df_chisels_new

df_mirrors_new = df_mirrors[['mirrorID', 'Context_artefacts::Site', 'Context_artefacts::Region', 'Category', 'Context_artefacts::Period',
                             'Context_artefacts::Type of context', 'Type', 'Heigth of artefact', 'Width of artefact', 'Thickness of artefact', 'Heigth of disc',
                             'Width of disc', 'Thickness of disc', 'Weight']]

df_mirrors_new

df_saws_new = df_saws[['sawID', 'Context_artefacts::Site', 'Context_artefacts::Region', 'Context_artefacts::Period', 'Type', 'Context_artefacts::Type of context',
                       'Category', 'Length of artefact', 'Width of artefact', 'Maximal thickness','Length of blade', 'Width of blade', 'Thickness of blade', 'Weight']]

df_saws_new

df_needles_new = df_needles[['needleID', 'Context_artefacts::Site', 'Context_artefacts::Region', 'Category', 'Context_artefacts::Period',
                             'Context_artefacts::Type of context', 'Type', 'Length of artefact', 'Maximal width', 'Maximal thickness', 'Width of head', 
                             'Thickness of head', 'Diameter of shaft', 'Weight']]

df_needles_new

In [None]:
# 3. Exportar el resultado a un csv

df_adzes_new.to_csv("/Users/bastiensegalas/Desktop/EDA/data/procesado/azuelas.csv", index=False)
df_axes_new.to_csv("/Users/bastiensegalas/Desktop/EDA/data/procesado/hachas.csv", index=False)
df_chisels_new.to_csv("/Users/bastiensegalas/Desktop/EDA/data/procesado/cinceles.csv", index=False)
df_mirrors_new.to_csv("/Users/bastiensegalas/Desktop/EDA/data/procesado/espejos.csv", index=False)
df_saws_new.to_csv("/Users/bastiensegalas/Desktop/EDA/data/procesado/sierras.csv", index=False)
df_needles_new.to_csv("/Users/bastiensegalas/Desktop/EDA/data/procesado/agujas.csv", index=False)

In [None]:
# 4. Definir cardinalidad y tipificación de los datos de cada clase

df_azuelas = pd.read_csv("/Users/bastiensegalas/Desktop/EDA/data/procesado/azuelas.csv")
df_azuelas

df_tipi_azuelas = pd.DataFrame([df_azuelas.nunique(), df_azuelas.nunique()/len(df_azuelas) * 100, df_azuelas.dtypes]).\
    T.rename(columns = {0: "Card", 1: "%_Card", 2: "Tipo"})

df_tipi_azuelas

df_tipi_azuelas["Clasificada_como"] = "Categorica"
df_tipi_azuelas.loc[df_tipi_azuelas.Card == 2, "Clasificada_como"] = "Binaria"
df_tipi_azuelas.loc[df_tipi_azuelas["Card"] > 10, "Clasificada_como"] ="Numerica Discreta"
df_tipi_azuelas.loc[df_tipi_azuelas["%_Card"] > 30, "Clasificada_como"] = "Numerica Continua"
df_tipi_azuelas

In [None]:
# 5. Definir las frecuencias de las categóricas

cat_azuelas = ['Context_artefacts::Site', 'Category', 'Context_artefacts::Region', 'Context_artefacts::Period', 'Context_artefacts::Type of context', 'Type']
distrib_cat(df_azuelas, cat_azuelas)

In [None]:
# 6. Definir las medidas de pocisiones

num_azuelas = ['Length of artefact', 'Width of artefact', 'Thickness of artefact', 'Width of butt', 'Thickness of butt', 'Thickness of blade',
               'Width of blade', 'Weight']

plot_multiple_boxplots(df_azuelas, num_azuelas)

In [None]:
# 7. Comprobar la variabilidad de los datos

variabilidad(df_azuelas)

In [None]:
# 8. Establecer la relación entre la repartición grográfica y cronológica

plot_categorical_relationship_fin(df_azuelas, 'Context_artefacts::Site','Context_artefacts::Period', show_values= True)