Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 182 additions & 0 deletions LAB2_WEEK17_DAY3.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(r\"C:\\Users\\Win10\\Downloads\\learningSet.csv\",low_memory=False)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Valores nulos en las columnas numéricas:\n",
"AGE 23665\n",
"NUMCHLD 83026\n",
"INCOME 21286\n",
"WEALTH1 44732\n",
"MBCRAFT 52854\n",
" ... \n",
"RAMNT_23 87553\n",
"RAMNT_24 77674\n",
"NEXTDATE 9973\n",
"TIMELAG 9973\n",
"CLUSTER2 132\n",
"Length: 91, dtype: int64\n"
]
}
],
"source": [
"# Comprobar si hay valores nulos en las columnas numéricas\n",
"numeric_columns = df.select_dtypes(include=['number']).columns\n",
"null_counts_numeric = df[numeric_columns].isnull().sum()\n",
"print(\"Valores nulos en las columnas numéricas:\")\n",
"print(null_counts_numeric[null_counts_numeric > 0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Utilice métodos apropiados para limpiar las columnas GEOCODE2, WEALTH1, ADI, DMA,y MSA"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# 1. GEOCODE2: Asumiremos que los valores nulos pueden ser reemplazados por una categoría 'N/A'\n",
"df['GEOCODE2'] = df['GEOCODE2'].fillna('N/A')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# 2. WEALTH1: Reemplazar valores nulos con la mediana o la moda (usaremos la mediana aquí)\n",
"df['WEALTH1'] = df['WEALTH1'].fillna(df['WEALTH1'].median())"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# 3. ADI: Reemplazar valores nulos con la mediana\n",
"df['ADI'] = df['ADI'].fillna(df['ADI'].median())"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# 4. DMA: Reemplazar valores nulos con la mediana\n",
"df['DMA'] = df['DMA'].fillna(df['DMA'].median())"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# 5. MSA: Reemplazar valores nulos con la mediana\n",
"df['MSA'] = df['MSA'].fillna(df['MSA'].median())"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Valores nulos después de la limpieza:\n",
"GEOCODE2 0\n",
"WEALTH1 0\n",
"ADI 0\n",
"DMA 0\n",
"MSA 0\n",
"dtype: int64\n"
]
}
],
"source": [
"# Verificar nuevamente los valores nulos después de la limpieza\n",
"print(\"\\nValores nulos después de la limpieza:\")\n",
"print(df[['GEOCODE2', 'WEALTH1', 'ADI', 'DMA', 'MSA']].isnull().sum())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}