#**Telecom** X - Análisis de Evasión de **Clientes**

##Importación de librerías

In [29]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import os


#📌 Extracción

In [30]:
url = 'https://raw.githubusercontent.com/ingridcristh/challenge2-data-science-LATAM/refs/heads/main/TelecomX_Data.json'

response = requests.get(url)
data = response.json()

In [31]:
df = pd.DataFrame(data)
df.dtypes

Unnamed: 0,0
customerID,object
Churn,object
customer,object
phone,object
internet,object
account,object


In [32]:
df.head()

Unnamed: 0,customerID,Churn,customer,phone,internet,account
0,0002-ORFBO,No,"{'gender': 'Female', 'SeniorCitizen': 0, 'Part...","{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'DSL', 'OnlineSecurity': '...","{'Contract': 'One year', 'PaperlessBilling': '..."
1,0003-MKNFE,No,"{'gender': 'Male', 'SeniorCitizen': 0, 'Partne...","{'PhoneService': 'Yes', 'MultipleLines': 'Yes'}","{'InternetService': 'DSL', 'OnlineSecurity': '...","{'Contract': 'Month-to-month', 'PaperlessBilli..."
2,0004-TLHLJ,Yes,"{'gender': 'Male', 'SeniorCitizen': 0, 'Partne...","{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'Fiber optic', 'OnlineSecu...","{'Contract': 'Month-to-month', 'PaperlessBilli..."
3,0011-IGKFF,Yes,"{'gender': 'Male', 'SeniorCitizen': 1, 'Partne...","{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'Fiber optic', 'OnlineSecu...","{'Contract': 'Month-to-month', 'PaperlessBilli..."
4,0013-EXCHZ,Yes,"{'gender': 'Female', 'SeniorCitizen': 1, 'Part...","{'PhoneService': 'Yes', 'MultipleLines': 'No'}","{'InternetService': 'Fiber optic', 'OnlineSecu...","{'Contract': 'Month-to-month', 'PaperlessBilli..."


#🔧 Transformación

In [22]:
print(df.columns.tolist())

['customerid', 'churn', 'customer_gender', 'customer_seniorcitizen', 'customer_partner', 'customer_dependents', 'customer_tenure', 'phone_phoneservice', 'phone_multiplelines', 'internet_internetservice', 'internet_onlinesecurity', 'internet_onlinebackup', 'internet_deviceprotection', 'internet_techsupport', 'internet_streamingtv', 'internet_streamingmovies', 'account_contract', 'account_paperlessbilling', 'account_paymentmethod', 'account_charges.monthly', 'account_charges.total']


In [33]:
nestedData_col = ['customer', 'phone', 'internet', 'account']

for col in nestedData_col:
    nestedData = pd.json_normalize(df[col])
    nestedData.columns = [f"{col}_{subcol}".lower() for subcol in nestedData.columns]
    df = pd.concat([df.drop(columns=[col]), nestedData], axis=1)


df.columns = df.columns.str.replace(' ', '_').str.lower()

In [34]:
df.head()

Unnamed: 0,customerid,churn,customer_gender,customer_seniorcitizen,customer_partner,customer_dependents,customer_tenure,phone_phoneservice,phone_multiplelines,internet_internetservice,...,internet_onlinebackup,internet_deviceprotection,internet_techsupport,internet_streamingtv,internet_streamingmovies,account_contract,account_paperlessbilling,account_paymentmethod,account_charges.monthly,account_charges.total
0,0002-ORFBO,No,Female,0,Yes,Yes,9,Yes,No,DSL,...,Yes,No,Yes,Yes,No,One year,Yes,Mailed check,65.6,593.3
1,0003-MKNFE,No,Male,0,No,No,9,Yes,Yes,DSL,...,No,No,No,No,Yes,Month-to-month,No,Mailed check,59.9,542.4
2,0004-TLHLJ,Yes,Male,0,No,No,4,Yes,No,Fiber optic,...,No,Yes,No,No,No,Month-to-month,Yes,Electronic check,73.9,280.85
3,0011-IGKFF,Yes,Male,1,Yes,No,13,Yes,No,Fiber optic,...,Yes,Yes,No,Yes,Yes,Month-to-month,Yes,Electronic check,98.0,1237.85
4,0013-EXCHZ,Yes,Female,1,Yes,No,3,Yes,No,Fiber optic,...,No,No,Yes,Yes,No,Month-to-month,Yes,Mailed check,83.9,267.4


In [37]:
ruta_backup = '../content/backup/telecomx_datos.csv'


if not os.path.exists('../content/backup/'):
    os.makedirs('../content/backup/')
    print("Carpeta '../content/backup/' creada.")
else:
    print("Carpeta '../content/backup/' ya existe.")


df_backup = df.copy()
print("Backup en memoria creado.")


df_backup.to_csv(ruta_backup, index=False)
print(f"Backup guardado correctamente en '{ruta_backup}'.")

Carpeta '../content/backup/' ya existe.
Backup en memoria creado.
Backup guardado correctamente en '../content/backup/telecomx_datos.csv'.


In [38]:
df_data = pd.read_csv('../content/backup/telecomx_datos.csv')

In [39]:
filas = df_data.shape[0]
columnas = df_data.shape[1]

print(f"Total de filas: {filas}, \nTotal de columnas: {columnas}")

Total de filas: 7267, 
Total de columnas: 21


In [40]:
df_data.columns

Index(['customerid', 'churn', 'customer_gender', 'customer_seniorcitizen',
       'customer_partner', 'customer_dependents', 'customer_tenure',
       'phone_phoneservice', 'phone_multiplelines', 'internet_internetservice',
       'internet_onlinesecurity', 'internet_onlinebackup',
       'internet_deviceprotection', 'internet_techsupport',
       'internet_streamingtv', 'internet_streamingmovies', 'account_contract',
       'account_paperlessbilling', 'account_paymentmethod',
       'account_charges.monthly', 'account_charges.total'],
      dtype='object')

In [41]:
df_limpiar_datos = df_data.copy()

In [42]:
if 'churn' in df_limpiar_datos.columns:
    df_limpiar_datos['churn'] = df_limpiar_datos['churn'].map({'Yes': 'Canceló', 'No': 'Retenido'})
    print("Cambios realizados.")
else:
    print("Atención: Columna 'churn' no encontrada.")

Cambios realizados.


#📊 Carga y análisis

#📄Informe final