In [108]:
%pip install -r ../requirements.txt

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.1.2 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [109]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob
import webbrowser
import base64
import io
import logging


In [110]:
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [111]:
logger.info(f"DataFrame info:\n{df.info()}")
logger.info(f"DataFrame head:\n{df.head()}")
logger.info(f"Unique profiles: {df['profile'].unique()}")

INFO:__main__:DataFrame info:
None
INFO:__main__:DataFrame head:
   profile   name surname  birth_date        dni                        email  \
0  buyer_1  Aitor   López  1996-08-10  17033061C  aitor.lópez@awesomecode.net   
1  buyer_1  Aitor   López  1996-08-10  17033061C  aitor.lópez@awesomecode.net   
2  buyer_1  Aitor   López  1996-08-10  17033061C  aitor.lópez@awesomecode.net   
3  buyer_1  Aitor   López  1996-08-10  17033061C  aitor.lópez@awesomecode.net   
4  buyer_1  Aitor   López  1996-08-10  17033061C  aitor.lópez@awesomecode.net   

   password                    iban   assets           timestamp    city  \
0    185417  ES44209556455181134018  3547.89 2022-01-01 18:52:16  Bilbao   
1    185417  ES44209556455181134018  3547.89 2022-01-01 09:45:07  Bilbao   
2    185417  ES44209556455181134018  3547.89 2022-01-02 12:56:04  Bilbao   
3    185417  ES44209556455181134018  3547.89 2022-01-02 12:10:17  Bilbao   
4    185417  ES44209556455181134018  3547.89 2022-01-02 17:31:06  Bi

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9158 entries, 0 to 9157
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   profile     9158 non-null   object        
 1   name        9158 non-null   object        
 2   surname     9158 non-null   object        
 3   birth_date  9158 non-null   object        
 4   dni         9158 non-null   object        
 5   email       9158 non-null   object        
 6   password    9158 non-null   int64         
 7   iban        9158 non-null   object        
 8   assets      9158 non-null   float64       
 9   timestamp   9158 non-null   datetime64[ns]
 10  city        9158 non-null   object        
 11  type        9158 non-null   object        
 12  category    9158 non-null   object        
 13  amount      9158 non-null   float64       
 14  balance     9158 non-null   float64       
dtypes: datetime64[ns](1), float64(3), int64(1), object(10)
memory usage: 1.0

In [112]:
def load_most_recent_csv(folder_path):
    # Use glob to find all CSV files in the specified folder
    csv_files = glob.glob(os.path.join(folder_path, '*.csv'))
    
    # Get the most recent file based on the modification time
    if csv_files:
        latest_file = max(csv_files, key=os.path.getmtime)
        print(f"Loading the most recent file: {latest_file}")
        return pd.read_csv(latest_file)
    else:
        print("No CSV files found in the specified folder.")
        return None


In [113]:
folder_path = '../data'
data = load_most_recent_csv(folder_path)


Loading the most recent file: ../data\data_bank_trx.csv


In [114]:
print(data.head())


   profile   name surname  birth_date        dni                        email  \
0  buyer_1  Aitor   López  1996-08-10  17033061C  aitor.lópez@awesomecode.net   
1  buyer_1  Aitor   López  1996-08-10  17033061C  aitor.lópez@awesomecode.net   
2  buyer_1  Aitor   López  1996-08-10  17033061C  aitor.lópez@awesomecode.net   
3  buyer_1  Aitor   López  1996-08-10  17033061C  aitor.lópez@awesomecode.net   
4  buyer_1  Aitor   López  1996-08-10  17033061C  aitor.lópez@awesomecode.net   

   password                    iban   assets            timestamp    city  \
0    185417  ES44209556455181134018  3547.89  2022-01-01T18:52:16  Bilbao   
1    185417  ES44209556455181134018  3547.89  2022-01-01T09:45:07  Bilbao   
2    185417  ES44209556455181134018  3547.89  2022-01-02T12:56:04  Bilbao   
3    185417  ES44209556455181134018  3547.89  2022-01-02T12:10:17  Bilbao   
4    185417  ES44209556455181134018  3547.89  2022-01-02T17:31:06  Bilbao   

       type    category   amount  balance  
0   in

In [115]:
df = data.copy()

In [116]:
# Check if 'timestamp' and 'balance' columns exist
if 'timestamp' not in df.columns or 'balance' not in df.columns:
    logger.error("Required columns 'timestamp' or 'balance' not found in the DataFrame")
    raise ValueError("Missing required columns in DataFrame")

# Ensure 'timestamp' is in datetime format
if not pd.api.types.is_datetime64_any_dtype(df['timestamp']):
    logger.info("Converting 'timestamp' to datetime")
    df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')

# Check for any NaT values in timestamp after conversion
if df['timestamp'].isna().any():
    logger.warning("NaT values found in 'timestamp' column after conversion")

INFO:__main__:Converting 'timestamp' to datetime


In [117]:
image_base64_list = []
for profile in df['profile'].unique():
    try:
        profile_data = df[df['profile'] == profile]
        logger.info(f"Generating graph for profile: {profile}")
        logger.info(f"Profile data shape: {profile_data.shape}")
        logger.info(f"Profile data head:\n{profile_data.head()}")
        
        fig, ax = plt.subplots(figsize=(10, 6))
        sns.lineplot(x='timestamp', y='balance', data=profile_data, marker='o', ax=ax)
        
        ax.set_title(f"Balance Evolution for Profile: {profile}", fontsize=14)
        ax.set_xlabel('Date', fontsize=12)
        ax.set_ylabel('Balance (€)', fontsize=12)
        plt.xticks(rotation=45)
        
        # Save plot to a bytes buffer
        buffer = io.BytesIO()
        plt.savefig(buffer, format='png', facecolor='#1E1E1E', edgecolor='none')
        buffer.seek(0)
        
        # Encode the bytes as base64
        image_base64 = base64.b64encode(buffer.getvalue()).decode()
        image_base64_list.append((profile, image_base64))
        
        plt.close(fig)
        logger.info(f"Successfully generated graph for profile: {profile}")
    except Exception as e:
        logger.error(f"Error generating graph for profile {profile}: {str(e)}", exc_info=True)

INFO:__main__:Generating graph for profile: buyer_1
INFO:__main__:Profile data shape: (1173, 15)
INFO:__main__:Profile data head:
   profile   name surname  birth_date        dni                        email  \
0  buyer_1  Aitor   López  1996-08-10  17033061C  aitor.lópez@awesomecode.net   
1  buyer_1  Aitor   López  1996-08-10  17033061C  aitor.lópez@awesomecode.net   
2  buyer_1  Aitor   López  1996-08-10  17033061C  aitor.lópez@awesomecode.net   
3  buyer_1  Aitor   López  1996-08-10  17033061C  aitor.lópez@awesomecode.net   
4  buyer_1  Aitor   López  1996-08-10  17033061C  aitor.lópez@awesomecode.net   

   password                    iban   assets           timestamp    city  \
0    185417  ES44209556455181134018  3547.89 2022-01-01 18:52:16  Bilbao   
1    185417  ES44209556455181134018  3547.89 2022-01-01 09:45:07  Bilbao   
2    185417  ES44209556455181134018  3547.89 2022-01-02 12:56:04  Bilbao   
3    185417  ES44209556455181134018  3547.89 2022-01-02 12:10:17  Bilbao   
4  

In [118]:
html_content = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Balance Evolution for Profiles</title>
    <style>
        body {
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            background-color: #1E1E1E;
            color: #FFFFFF;
            line-height: 1.6;
            padding: 20px;
            margin: 0;
        }
        .container {
            max-width: 80%;
            margin: 0 auto;
            text-align: center;
        }
        h1 {
            color: #BB86FC;
            margin-bottom: 30px;
            font-size: 2.5em;
        }
        h2 {
            color: #03DAC6;
            margin-top: 40px;
            font-size: 1.8em;
        }
        img {
            max-width: 100%;
            height: auto;
            border-radius: 8px;
            box-shadow: 0 4px 8px rgba(0,0,0,0.5);
            margin-bottom: 20px;
        }
        .profile-section {
            background-color: #2C2C2C;
            border-radius: 10px;
            padding: 20px;
            margin-bottom: 30px;
        }
        .error-message {
            color: #CF6679;
            font-style: italic;
        }
    </style>
</head>
<body>
    <div class="container">
        <h1>Balance Evolution for Profiles</h1>
"""

if not image_base64_list:
    html_content += "<p class='error-message'>No graphs were generated. Please check the error logs.</p>"
else:
    for profile, image_base64 in image_base64_list:
        html_content += f"""
            <div class="profile-section">
                <h2>Profile: {profile}</h2>
                <img src="data:image/png;base64,{image_base64}" alt="Balance evolution for {profile}">
            </div>
        """

html_content += """
    </div>
</body>
</html>
"""

In [119]:
output_dir = '../data_report'
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, 'profile_balance_report.html')

try:
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(html_content)
    logger.info(f"HTML report saved successfully to {output_file}")
except Exception as e:
    logger.error(f"Error saving HTML report: {str(e)}")


INFO:__main__:HTML report saved successfully to ../data_report\profile_balance_report.html


In [120]:
try:
    webbrowser.open('file://' + os.path.realpath(output_file))
    logger.info("Opened HTML report in default web browser")
except Exception as e:
    logger.error(f"Error opening HTML report in browser: {str(e)}")

print("Please check the console for detailed logs and error messages.")

INFO:__main__:Opened HTML report in default web browser


Please check the console for detailed logs and error messages.
