From e7bbfc70dcc3dc9c4af9f359c3942484f67c53cf Mon Sep 17 00:00:00 2001 From: MARIA240188 <139810107+MARIA240188@users.noreply.github.com> Date: Tue, 2 Jul 2024 21:22:32 +0200 Subject: [PATCH] Add files via upload --- LAB2_WEEK17_DAY3.ipynb | 182 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 LAB2_WEEK17_DAY3.ipynb diff --git a/LAB2_WEEK17_DAY3.ipynb b/LAB2_WEEK17_DAY3.ipynb new file mode 100644 index 0000000..b3482a7 --- /dev/null +++ b/LAB2_WEEK17_DAY3.ipynb @@ -0,0 +1,182 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(r\"C:\\Users\\Win10\\Downloads\\learningSet.csv\",low_memory=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Valores nulos en las columnas numéricas:\n", + "AGE 23665\n", + "NUMCHLD 83026\n", + "INCOME 21286\n", + "WEALTH1 44732\n", + "MBCRAFT 52854\n", + " ... \n", + "RAMNT_23 87553\n", + "RAMNT_24 77674\n", + "NEXTDATE 9973\n", + "TIMELAG 9973\n", + "CLUSTER2 132\n", + "Length: 91, dtype: int64\n" + ] + } + ], + "source": [ + "# Comprobar si hay valores nulos en las columnas numéricas\n", + "numeric_columns = df.select_dtypes(include=['number']).columns\n", + "null_counts_numeric = df[numeric_columns].isnull().sum()\n", + "print(\"Valores nulos en las columnas numéricas:\")\n", + "print(null_counts_numeric[null_counts_numeric > 0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Utilice métodos apropiados para limpiar las columnas GEOCODE2, WEALTH1, ADI, DMA,y MSA" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# 1. GEOCODE2: Asumiremos que los valores nulos pueden ser reemplazados por una categoría 'N/A'\n", + "df['GEOCODE2'] = df['GEOCODE2'].fillna('N/A')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# 2. WEALTH1: Reemplazar valores nulos con la mediana o la moda (usaremos la mediana aquí)\n", + "df['WEALTH1'] = df['WEALTH1'].fillna(df['WEALTH1'].median())" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# 3. ADI: Reemplazar valores nulos con la mediana\n", + "df['ADI'] = df['ADI'].fillna(df['ADI'].median())" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# 4. DMA: Reemplazar valores nulos con la mediana\n", + "df['DMA'] = df['DMA'].fillna(df['DMA'].median())" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# 5. MSA: Reemplazar valores nulos con la mediana\n", + "df['MSA'] = df['MSA'].fillna(df['MSA'].median())" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Valores nulos después de la limpieza:\n", + "GEOCODE2 0\n", + "WEALTH1 0\n", + "ADI 0\n", + "DMA 0\n", + "MSA 0\n", + "dtype: int64\n" + ] + } + ], + "source": [ + "# Verificar nuevamente los valores nulos después de la limpieza\n", + "print(\"\\nValores nulos después de la limpieza:\")\n", + "print(df[['GEOCODE2', 'WEALTH1', 'ADI', 'DMA', 'MSA']].isnull().sum())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}