1. Importação de Bibliotecas

In [3]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import boto3
from io import BytesIO

# Configurações de estilo para os gráficos
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)

2. Conexão com o MinIO e Carregamento dos Dados

In [4]:
# Configurações de conexão com o MinIO
MINIO_ENDPOINT = "127.0.0.1:9000"
MINIO_ACCESS_KEY = "minioadmin"  # Use o seu usuário
MINIO_SECRET_KEY = "minioadmin"  # Use a sua senha
BUCKET_NAME = "student-stress"
OBJECT_NAME = "StressLevelDataset.csv"

# Criar um cliente S3 compatível para interagir com o MinIO
s3_client = boto3.client(
    "s3",
    endpoint_url=f"http://{MINIO_ENDPOINT}",
    aws_access_key_id=MINIO_ACCESS_KEY,
    aws_secret_access_key=MINIO_SECRET_KEY,
    aws_session_token=None,
    config=boto3.session.Config(signature_version="s3v4"),
)

# Tentar carregar o objeto do bucket
try:
    response = s3_client.get_object(Bucket=BUCKET_NAME, Key=OBJECT_NAME)
    # Ler o conteúdo do objeto em um DataFrame do Pandas
    # O BytesIO trata os bytes do objeto como um arquivo em memória
    df = pd.read_csv(BytesIO(response["Body"].read()))
    print("Dataset carregado com sucesso do MinIO!")
except Exception as e:
    print(f"Erro ao carregar o dataset do MinIO: {e}")

Dataset carregado com sucesso do MinIO!


3. Inspeção Inicial do DataFrame

In [5]:
# Visualizar as 5 primeiras linhas do dataset
print("Primeiras 5 linhas do dataset:")
display(df.head())

# Obter informações gerais sobre o DataFrame (tipos de dados, valores não nulos)
print("\nInformações gerais do DataFrame:")
df.info()

# Obter estatísticas descritivas para as colunas numéricas
print("\nEstatísticas Descritivas:")
display(df.describe())

# Verificar as dimensões do dataset (linhas, colunas)
print(f"\nO dataset possui {df.shape[0]} linhas e {df.shape[1]} colunas.")

Primeiras 5 linhas do dataset:


Unnamed: 0,anxiety_level,self_esteem,mental_health_history,depression,headache,blood_pressure,sleep_quality,breathing_problem,noise_level,living_conditions,...,basic_needs,academic_performance,study_load,teacher_student_relationship,future_career_concerns,social_support,peer_pressure,extracurricular_activities,bullying,stress_level
0,14,20,0,11,2,1,2,4,2,3,...,2,3,2,3,3,2,3,3,2,1
1,15,8,1,15,5,3,1,4,3,1,...,2,1,4,1,5,1,4,5,5,2
2,12,18,1,14,2,1,2,2,2,2,...,2,2,3,3,2,2,3,2,2,1
3,16,12,1,15,4,3,1,3,4,2,...,2,2,4,1,4,1,4,4,5,2
4,16,28,0,7,2,3,5,1,3,2,...,3,4,3,1,2,1,5,0,5,1



Informações gerais do DataFrame:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1100 entries, 0 to 1099
Data columns (total 21 columns):
 #   Column                        Non-Null Count  Dtype
---  ------                        --------------  -----
 0   anxiety_level                 1100 non-null   int64
 1   self_esteem                   1100 non-null   int64
 2   mental_health_history         1100 non-null   int64
 3   depression                    1100 non-null   int64
 4   headache                      1100 non-null   int64
 5   blood_pressure                1100 non-null   int64
 6   sleep_quality                 1100 non-null   int64
 7   breathing_problem             1100 non-null   int64
 8   noise_level                   1100 non-null   int64
 9   living_conditions             1100 non-null   int64
 10  safety                        1100 non-null   int64
 11  basic_needs                   1100 non-null   int64
 12  academic_performance          1100 non-null   int64
 13 

Unnamed: 0,anxiety_level,self_esteem,mental_health_history,depression,headache,blood_pressure,sleep_quality,breathing_problem,noise_level,living_conditions,...,basic_needs,academic_performance,study_load,teacher_student_relationship,future_career_concerns,social_support,peer_pressure,extracurricular_activities,bullying,stress_level
count,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,...,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0,1100.0
mean,11.063636,17.777273,0.492727,12.555455,2.508182,2.181818,2.66,2.753636,2.649091,2.518182,...,2.772727,2.772727,2.621818,2.648182,2.649091,1.881818,2.734545,2.767273,2.617273,0.996364
std,6.117558,8.944599,0.500175,7.727008,1.409356,0.833575,1.548383,1.400713,1.328127,1.119208,...,1.433761,1.414594,1.315781,1.384579,1.529375,1.047826,1.425265,1.417562,1.530958,0.821673
min,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,6.0,11.0,0.0,6.0,1.0,1.0,1.0,2.0,2.0,2.0,...,2.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,1.0,0.0
50%,11.0,19.0,0.0,12.0,3.0,2.0,2.5,3.0,3.0,2.0,...,3.0,2.0,2.0,2.0,2.0,2.0,2.0,2.5,3.0,1.0
75%,16.0,26.0,1.0,19.0,3.0,3.0,4.0,4.0,3.0,3.0,...,4.0,4.0,3.0,4.0,4.0,3.0,4.0,4.0,4.0,2.0
max,21.0,30.0,1.0,27.0,5.0,3.0,5.0,5.0,5.0,5.0,...,5.0,5.0,5.0,5.0,5.0,3.0,5.0,5.0,5.0,2.0



O dataset possui 1100 linhas e 21 colunas.
