In [10]:
import numpy as np

# Generate temperature data (Celsius) and humidity data for 500 locations over 365 days
temperature_data = np.random.uniform(-10, 40, (500, 365))
humidity_data = np.random.uniform(0, 100, (500, 365))


In [2]:
# Simulate 5% missing data
num_missing = int(0.05 * temperature_data.size)
temperature_data.ravel()[np.random.choice(temperature_data.size, num_missing, replace=False)] = np.nan
humidity_data.ravel()[np.random.choice(humidity_data.size, num_missing, replace=False)] = np.nan

# Count missing values
temp_missing_count = np.isnan(temperature_data).sum()
humidity_missing_count = np.isnan(humidity_data).sum()
print(f"Missing temperature data: {temp_missing_count}, Missing humidity data: {humidity_missing_count}")


Missing temperature data: 9125, Missing humidity data: 9125


In [3]:
# Convert temperature to Fahrenheit
temperature_data_fahrenheit = temperature_data * 9/5 + 32

# Calculate "feels like" discomfort index
discomfort_index = 0.72 * (temperature_data_fahrenheit + 95 * (humidity_data / 100)) + 40.6
discomfort_index[discomfort_index > 80] = 80


In [4]:
# Extract January data (first 31 days) and calculate average temperature across locations
january_temps = temperature_data[:, :31]
january_avg_temp = np.nanmean(january_temps)
print(f"Average January temperature: {january_avg_temp:.2f} °C")


Average January temperature: 15.08 °C


In [5]:
# Mark temperatures exceeding 35°C as null
temperature_data[temperature_data > 35] = np.nan

# Count the number of null values per location
null_count_per_location = np.isnan(temperature_data).sum(axis=1)
print("Null values per location after marking extreme temperatures:", null_count_per_location)


Null values per location after marking extreme temperatures: [47 44 50 60 51 45 56 57 50 45 46 51 53 43 60 51 49 45 59 50 57 47 55 53
 61 56 40 50 42 57 50 56 59 48 62 47 53 62 53 56 56 59 55 45 56 53 54 49
 59 56 47 50 46 48 51 43 56 74 64 47 62 61 51 51 60 46 55 56 46 61 54 52
 55 47 49 52 50 43 57 46 62 61 52 52 52 51 62 59 54 51 64 50 37 54 57 72
 51 62 61 60 59 55 51 45 53 51 56 58 59 59 56 54 55 60 64 44 61 57 46 51
 51 53 50 62 46 42 51 53 48 45 46 51 56 47 46 48 56 45 48 46 63 50 55 54
 57 48 50 64 42 51 47 54 55 56 52 49 46 70 45 58 71 50 50 60 54 58 51 55
 51 61 53 50 45 58 45 53 54 44 59 53 55 53 56 51 58 39 58 39 48 50 61 47
 52 50 51 55 51 48 54 43 50 46 44 60 52 57 57 58 59 44 56 49 63 45 48 59
 58 47 42 64 57 54 57 55 54 46 48 45 38 46 46 42 53 57 48 54 44 62 52 59
 49 46 56 51 55 45 57 49 51 55 50 35 59 52 61 60 40 64 49 62 49 56 56 60
 60 51 47 45 52 48 57 53 55 48 50 41 57 64 64 62 49 52 52 44 46 39 53 52
 53 52 65 37 54 54 60 68 68 49 46 61 64 53 50 50 48 45 53 65 52

In [9]:
# Split temperature_data into quarters manually
q1 = temperature_data[:, :90]      # First 90 days
q2 = temperature_data[:, 90:181]   # Next 91 days
q3 = temperature_data[:, 181:273]  # Next 92 days
q4 = temperature_data[:, 273:]     # Last 92 days

# Calculate the average temperature for each quarter
q1_avg = np.nanmean(q1, axis=1)
q2_avg = np.nanmean(q2, axis=1)
q3_avg = np.nanmean(q3, axis=1)
q4_avg = np.nanmean(q4, axis=1)

# Combine the quarterly averages into an array for each location
quarterly_averages = np.vstack((q1_avg, q2_avg, q3_avg, q4_avg)).T
print("Quarterly temperature averages:", quarterly_averages)


Quarterly temperature averages: [[11.52827513 14.5668776  13.82253218 12.55296073]
 [12.45376924 12.58995502 10.88238869 12.32447205]
 [10.71611707 13.81226372 12.46435115 13.98294177]
 ...
 [11.06900537 12.82374481 10.80016636 11.98477833]
 [13.93028017  9.0122533   9.40092858 13.02190556]
 [10.92782154 12.52489872 10.39702189 13.26546154]]


In [7]:
# Classify each day's humidity level
dry_days = (humidity_data < 30).sum(axis=1)
humid_days = (humidity_data > 70).sum(axis=1)
print("Total 'Dry' and 'Humid' days per location:", dry_days, humid_days)


Total 'Dry' and 'Humid' days per location: [ 96 108 107 100 109 108 104 109 105 107  97 101 106  86 113 108  83 113
 108  95 103 108 102 112 107 124 109 100 121  99 112 112 104 100 117  96
 102  94 122  94 112 111 101 100  91 103 109 108  97 103  97 114  89 106
 104 112 112 117 104 103 104  97 102 109  97  92  95 107 102 105 107 100
  97 102 100 101 117 102 105  96 109  98 100  83 101 108 111 111 113  87
 100 119 105 110 110 108 110  96  91 101 120  94 122  98  91 110 111 103
 103 114 105  96  93 109  99  80  93  93  98  96 111 110 104 108 110 108
 120 103 106 114 108  98  97  94  99 101  95 103 101 109 102 107 115 105
 105  90 102 105 110 103  97 114 109 102  97  94 101 110  93 126  98 109
  92 119 109 112 100  97 102 107 107 115 112 107  95 100 104 114 108 109
 100 102  97 103 103 108  85 104  98 109 120 106 110 105 104 106 108  96
 115  98 114 104 111 106 100  98 108 107 116 105 107 114  92  98 109 101
 110 107 112 113 103  97  97 123 101 103 105 109 109 103  99 107 111 108
 110  90

In [8]:
# Simulate a pressure trend across 365 days and adjust temperatures
daily_pressure_trend = np.sin(np.linspace(0, 2 * np.pi, 365)) * 5  # Example pressure variation
temperature_data_adjusted = temperature_data + daily_pressure_trend
