In [1]:
import pandas as pd

# Creating the dataset
data = {
    "Ticket#": [10001, 10002, 10003, 10004, 10005, 10006, 10007, 10008, 10009, 10010],
    "Customer Name": ["John Doe", "Alice Smith", "Bob Johnson", "Carol White", "David Brown",
                      "Emma Davis", "Frank Clark", "Grace Hall", "Henry Scott", "Irene Moore"],
    "City": ["Atlanta", "Houston", "Miami", "New York", "Chicago", "Atlanta", "Dallas", "Miami", "Chicago", "Houston"],
    "State": ["Georgia", "Texas", "Florida", "New York", "Illinois", "Georgia", "Texas", "Florida", "Illinois", "Texas"],
    "Zipcode": [30301, 77001, 33101, 10001, 60601, 30302, 75001, 33102, 60602, 77002],
    "Complaint Type": ["Internet Issue", "Billing Issue", "Slow Speed", "Service Down", "Connectivity",
                       "No Signal", "Billing Issue", "Slow Speed", "Service Down", "Connectivity"],
    "Status": ["Open", "Closed", "Open", "Closed", "Open", "Open", "Closed", "Open", "Closed", "Open"],
    "Received Via": ["Internet", "Customer Care Call", "Internet", "Customer Care Call", "Internet",
                     "Customer Care Call", "Internet", "Customer Care Call", "Internet", "Customer Care Call"]
}

# Create DataFrame
df = pd.DataFrame(data)

# Save as CSV file
df.to_csv("comcast_telecom.csv", index=False)

# Display the DataFrame
print(df)

   Ticket# Customer Name      City     State  Zipcode  Complaint Type  Status  \
0    10001      John Doe   Atlanta   Georgia    30301  Internet Issue    Open   
1    10002   Alice Smith   Houston     Texas    77001   Billing Issue  Closed   
2    10003   Bob Johnson     Miami   Florida    33101      Slow Speed    Open   
3    10004   Carol White  New York  New York    10001    Service Down  Closed   
4    10005   David Brown   Chicago  Illinois    60601    Connectivity    Open   
5    10006    Emma Davis   Atlanta   Georgia    30302       No Signal    Open   
6    10007   Frank Clark    Dallas     Texas    75001   Billing Issue  Closed   
7    10008    Grace Hall     Miami   Florida    33102      Slow Speed    Open   
8    10009   Henry Scott   Chicago  Illinois    60602    Service Down  Closed   
9    10010   Irene Moore   Houston     Texas    77002    Connectivity    Open   

         Received Via  
0            Internet  
1  Customer Care Call  
2            Internet  
3  Customer 

In [2]:

# 1. Print the number of records and number of columns
print("\nNumber of Records and Columns:")
print(df.shape)


Number of Records and Columns:
(10, 8)


In [3]:
# 2. Separate numerical and categorical data types
numerical_cols = df.select_dtypes(include=['number']).columns
categorical_cols = df.select_dtypes(include=['object']).columns
print("\nNumerical Columns:", list(numerical_cols))
print("Categorical Columns:", list(categorical_cols))



Numerical Columns: ['Ticket#', 'Zipcode']
Categorical Columns: ['Customer Name', 'City', 'State', 'Complaint Type', 'Status', 'Received Via']


In [4]:
# 3. Print the number of complaints as per the type of status
print("\nNumber of Complaints as per Status:")
print(df["Status"].value_counts())


Number of Complaints as per Status:
Status
Open      6
Closed    4
Name: count, dtype: int64


In [5]:
# 4. Print the records of the complaints received via "Internet"
print("\nComplaints received via 'Internet':")
print(df[df["Received Via"] == "Internet"])


Complaints received via 'Internet':
   Ticket# Customer Name     City     State  Zipcode  Complaint Type  Status  \
0    10001      John Doe  Atlanta   Georgia    30301  Internet Issue    Open   
2    10003   Bob Johnson    Miami   Florida    33101      Slow Speed    Open   
4    10005   David Brown  Chicago  Illinois    60601    Connectivity    Open   
6    10007   Frank Clark   Dallas     Texas    75001   Billing Issue  Closed   
8    10009   Henry Scott  Chicago  Illinois    60602    Service Down  Closed   

  Received Via  
0     Internet  
2     Internet  
4     Internet  
6     Internet  
8     Internet  


In [6]:
# 5. Print the name of the state which received the maximum number of complaints
print("\nState with Maximum Complaints:")
print(df["State"].value_counts().idxmax())



State with Maximum Complaints:
Texas


In [7]:
# 6. Print the records of customers from city "Atlanta" where complaints are received via "Customer Care Call"
print("\nCustomers from Atlanta with Complaints via 'Customer Care Call':")
print(df[(df["City"] == "Atlanta") & (df["Received Via"] == "Customer Care Call")])


Customers from Atlanta with Complaints via 'Customer Care Call':
   Ticket# Customer Name     City    State  Zipcode Complaint Type Status  \
5    10006    Emma Davis  Atlanta  Georgia    30302      No Signal   Open   

         Received Via  
5  Customer Care Call  


In [8]:
# 7. Drop the "Ticket#" column
df = df.drop(columns=["Ticket#"])
print("\n'Ticket#' column dropped.")



'Ticket#' column dropped.


In [9]:
# 8. Print the number of open and closed complaints as per the "Zipcode"
print("\nNumber of Open and Closed Complaints per Zipcode:")
print(df.groupby(["Zipcode", "Status"]).size())


Number of Open and Closed Complaints per Zipcode:
Zipcode  Status
10001    Closed    1
30301    Open      1
30302    Open      1
33101    Open      1
33102    Open      1
60601    Open      1
60602    Closed    1
75001    Closed    1
77001    Closed    1
77002    Open      1
dtype: int64


In [10]:
# 9. Print the names of the columns
print("\nColumn Names:")
print(df.columns)


Column Names:
Index(['Customer Name', 'City', 'State', 'Zipcode', 'Complaint Type', 'Status',
       'Received Via'],
      dtype='object')


In [11]:
# 10. Print the second and third column
print("\nSecond and Third Columns:")
print(df.iloc[:, 1:3])


Second and Third Columns:
       City     State
0   Atlanta   Georgia
1   Houston     Texas
2     Miami   Florida
3  New York  New York
4   Chicago  Illinois
5   Atlanta   Georgia
6    Dallas     Texas
7     Miami   Florida
8   Chicago  Illinois
9   Houston     Texas
