In [None]:
import pandas as pd
import numpy as np

data_np = np.array([
    ['Blue', [1, 2], 1.1],
    ['Red', [3, 4], 2.2],
    ['Pink', [5, 6], 3.3],
    ['Grey', [7, 8], 4.4],
    ['Black', [9, 10], 5.5]
], dtype=object)

df_from_numpy = pd.DataFrame(data_np, index=[1, 3, 5, 7, 9],
                             columns=['color', 'list', 'number'])

print("DataFrame from NumPy array:")
print(df_from_numpy)

# Create DataFrame from Pandas Series
data_series = {
    'color': pd.Series(['Blue', 'Red', 'Pink', 'Grey', 'Black'],
                       index=[1, 3, 5, 7, 9]),
    'list': pd.Series([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]],
                      index=[1, 3, 5, 7, 9]),
    'number': pd.Series([1.1, 2.2, 3.3, 4.4, 5.5], index=[1, 3, 5,
                                                              7, 9])
}

df_from_series = pd.DataFrame(data_series)

print("DataFrame from Pandas Series:")
print(df_from_series)

# Print column types
print("Column types:")
print(df_from_numpy.dtypes)

# Print types of the first value of every column
print("Types of the first value of every column:")
for col in df_from_numpy.columns:
    print(f"Column '{col}': {type(df_from_numpy[col].iloc[0])}")

In [None]:
# --- Data Loading, Cleaning, and Transformation ---

# 1. Load the dataset
print("Step 1: Loading data...")
file_path = 'household_power_consumption_2.txt'
df = pd.read_csv(file_path, sep=';', na_values=['?'])
df = df.drop(columns=['Time', 'Sub_metering_2', 'Sub_metering_3'])
df = df.set_index('Date')

# 2. Update data types
print("Step 2: Updating data types...")
def update_types(df_to_update):
    for col in df_to_update.columns:
        df_to_update[col] = pd.to_numeric(df_to_update[col], errors='coerce')
    df_to_update.index = pd.to_datetime(df_to_update.index, format='%d/%m/%Y')
    return df_to_update
df_updated = update_types(df)

df_updated.describe()

# 3. Drop rows with missing values and create a copy
print("Step 3: Dropping missing values...")
df_cleaned = df_updated.dropna().copy()

print("--- Sub_metering_1 before modification ---")
print(df_cleaned['Sub_metering_1'].head())

# 4. Apply the transformation
print("Step 4: Applying transformation...")
df_cleaned['Sub_metering_1'] = (df_cleaned['Sub_metering_1'] + 1) * 0.06

print("--- Sub_metering_1 after modification ---")
print(df_cleaned['Sub_metering_1'].head())

  # 1. Select rows where Date >= 2008-12-27 and Voltage >= 242
print("--- 1. Filtering Data ---")
filtered_df = df_cleaned[(df_cleaned.index >= '2008-12-27') & (df_cleaned['Voltage'] >= 242)]
print(f"Found {len(filtered_df)} rows matching the criteria.")

print("\n--- 2. 88888th Row of Filtered Data ---")
if len(filtered_df) > 88888:
    print(filtered_df.iloc[88888])
else:
    print("There are not enough rows in the filtered data to select the 88888th row.")


  # 3. Find the date of the maximum Global_active_power
print("\n--- 3. Date of Maximum Global Active Power ---")
max_power_date = df_cleaned['Global_active_power'].idxmax()
print(f"The Global_active_power was maximal on: {max_power_date.date()}")


  # 4. Sort the first three columns
print("\n--- 4. Sorted DataFrame (First 3 Columns) ---")
sorted_df = df_cleaned.sort_values(by=['Global_active_power','Voltage'], ascending=[False, True])
print(sorted_df.iloc[:, :3].head()) # Displaying the first 3 columns of the sorted result


  # 5. Compute the daily average of Global_active_power
print("\n--- 5. Daily Average of Global Active Power ---")
daily_avg_power = df_cleaned['Global_active_power'].resample('D').mean()
print(daily_avg_power.head())

