In [24]:
import numpy as np

# Task 1: Initialize Temperature and Humidity Data
# -----------------------------------------------
num_locations = 500
num_days = 365

# Randomly generate temperature data between -10°C and 40°C
temperature_data = np.random.uniform(low=-10, high=40, size=(num_locations, num_days))

# Randomly generate humidity data between 0% and 100%
humidity_data = np.random.uniform(low=0, high=100, size=(num_locations, num_days))

# Display the first few rows of each array for verification
print("Temperature Data (sample):")
print(temperature_data[:5])  # Displaying the first 5 locations for brevity

print("\nHumidity Data (sample):")
print(humidity_data[:5])  # Displaying the first 5 locations for brevity


Temperature Data (sample):
[[27.1848426  20.2757434  -1.43777161 ... 35.60512384 29.98156242
   2.86188426]
 [-7.49249367 26.73714432 -1.817015   ... 11.97502878 39.93572736
  22.94742626]
 [-3.08795681 10.72603694 25.69790966 ... 34.9305335  23.65598765
  -8.57799921]
 [ 5.67761943  6.6855502  30.46771622 ... 35.41564201 -3.20746858
   2.16627785]
 [-9.6994517   9.31333461 19.6722384  ... -9.89633975 33.71286112
  10.69533173]]

Humidity Data (sample):
[[8.00876192e+01 7.77744253e+01 3.47129992e+01 ... 7.13452315e-02
  5.87044249e+01 9.00803133e+01]
 [2.97091158e+01 7.34836964e+01 5.85774136e+01 ... 4.12293190e+01
  6.32250806e+00 6.97088095e+01]
 [9.05113177e+01 1.93376422e+01 2.92394644e+01 ... 3.18530851e+01
  3.71615113e+01 1.38398264e+00]
 [5.41911270e+01 9.76961543e+01 8.40935340e+01 ... 9.11918025e+00
  2.45246264e+01 2.20202763e+01]
 [9.75209544e+01 2.35537625e+01 4.74370436e+01 ... 9.85078403e+01
  3.90694013e+01 9.81229686e+01]]


In [26]:
# Task 2: Simulate Missing Data
# ------------------------------
num_missing_values = int(0.05 * num_locations * num_days)

# Randomly choose indices for missing values in temperature and humidity data
temperature_indices = np.random.choice(num_locations * num_days, num_missing_values, replace=False)
humidity_indices = np.random.choice(num_locations * num_days, num_missing_values, replace=False)

# Flatten arrays, set chosen indices to NaN (as a placeholder for missing values), then reshape back
temperature_data_flat = temperature_data.flatten()
humidity_data_flat = humidity_data.flatten()

temperature_data_flat[temperature_indices] = np.nan
humidity_data_flat[humidity_indices] = np.nan

temperature_data = temperature_data_flat.reshape(num_locations, num_days)
humidity_data = humidity_data_flat.reshape(num_locations, num_days)

# Count and report the total number of missing entries
temperature_missing_count = np.isnan(temperature_data).sum()
humidity_missing_count = np.isnan(humidity_data).sum()

print("Total missing entries in temperature data:", temperature_missing_count)
print("Total missing entries in humidity data:", humidity_missing_count)

Total missing entries in temperature data: 9125
Total missing entries in humidity data: 9125


In [36]:
# Task 3: Convert Temperature and Calculate Discomfort Index
# -----------------------------------------------------------
# Convert temperature from Celsius to Fahrenheit
temperature_data_fahrenheit = np.where(~np.isnan(temperature_data), temperature_data * 9/5 + 32, np.nan)

# Calculate "feels like" discomfort index
discomfort_index = temperature_data_fahrenheit - 0.55 * (1 - humidity_data / 100) * (temperature_data_fahrenheit - 58)

# Cap discomfort index at 80
discomfort_index = np.where(discomfort_index > 80, 80, discomfort_index)
print(f"Discomfort Index:\n {discomfort_index}")

Discomfort Index:
 [[78.42116926 67.21325881 39.6773434  ... 75.15509648 79.61483155
  38.28885581]
 [33.77898249 76.89988584 35.39793092 ... 54.99183128 80.
  70.75546939]
 [28.08863567 54.2762281  70.37285525 ... 80.         68.85026717
  39.03638028]
 ...
 [53.54193598 80.         72.227687   ... 74.92732202 80.
  80.        ]
 [46.36203808 80.         50.80731338 ... 42.58620109 53.92780556
  53.47144938]
 [53.45427651 80.         53.26416486 ... 31.52730201         nan
  48.34868728]]


In [40]:
# Task 4: Analyze January Temperatures
# --------------------------------------
# Extract January temperatures (first 31 days)
january_temperatures = temperature_data[:, :31]

# Calculate average January temperature across all locations, ignoring NaN values
january_avg_temperature = np.nanmean(january_temperatures)

print("Average January Temperature (Celsius):", january_avg_temperature)

Average January Temperature (Celsius): 14.975459118170953


In [42]:
# Task 5: Identify Extreme Temperatures
# --------------------------------------
# Mark temperatures > 35°C as potential errors by setting to NaN
temperature_data_extreme_marked = np.where(temperature_data > 35, np.nan, temperature_data)

# Count the number of NaN values per location after marking
null_counts_per_location = np.isnan(temperature_data_extreme_marked).sum(axis=1)

print("Null values per location after marking extreme temperatures:", null_counts_per_location)

Null values per location after marking extreme temperatures: [47 52 50 56 60 49 58 57 59 53 61 61 41 52 46 49 53 50 47 60 64 63 47 55
 57 56 52 55 63 63 42 54 53 47 58 49 62 54 59 45 48 49 52 45 61 56 51 54
 62 48 53 55 55 61 56 49 46 52 59 45 51 66 59 45 59 43 55 50 49 60 58 53
 48 58 49 59 59 61 61 59 51 59 52 56 54 54 60 50 44 70 56 56 49 40 46 67
 54 54 44 52 55 53 60 60 61 44 52 51 47 51 53 47 51 58 44 50 54 60 43 53
 45 49 52 46 49 42 49 53 49 52 56 65 42 50 51 56 51 57 50 61 55 54 54 49
 53 69 44 53 59 65 49 59 50 55 48 68 47 53 47 61 51 47 57 52 59 50 57 55
 61 56 49 58 60 51 57 69 51 55 55 56 54 48 56 58 60 56 50 46 52 60 50 55
 41 47 49 52 56 57 54 42 61 48 51 68 43 48 55 50 66 58 40 57 52 51 62 43
 67 65 62 46 48 52 53 64 61 34 51 55 48 56 61 56 63 57 54 53 55 48 47 56
 51 53 55 58 48 58 48 51 48 45 53 53 44 52 43 59 61 44 53 57 54 54 46 51
 68 62 40 50 52 46 52 51 44 48 47 44 49 51 45 51 62 67 52 62 50 56 38 60
 59 44 57 62 61 44 56 57 46 54 51 45 50 57 57 44 50 55 64 53 58

In [44]:
# Task 6: Calculate Quarterly Temperature Averages
# ------------------------------------------------
# Reshape temperature data into quarters (365 days split roughly into 4 quarters)
quarters = np.array_split(temperature_data, 4, axis=1)

# Calculate the average temperature for each quarter and each location
quarterly_averages = [np.nanmean(quarter, axis=1) for quarter in quarters]

print("Quarterly average temperatures per location (Celsius):", quarterly_averages)

Quarterly average temperatures per location (Celsius): [array([15.73741967, 16.08689168, 15.93363686, 14.679471  , 14.65307293,
       16.17160107, 14.97757964, 15.5121542 , 15.44708152, 15.3051609 ,
       12.92665902, 14.47155199, 13.57732528, 17.04507553, 12.72179988,
       14.93889023, 15.81514576, 11.97967555, 13.25992424, 14.13093744,
       16.69676546, 13.73184339, 15.32948017, 13.10099437, 15.93594939,
       18.0939397 , 14.70784306, 13.68365509, 15.33556451, 17.95568805,
       11.60388321, 15.64922913, 12.95920375, 13.98876756, 14.72910663,
       16.96601784, 14.5839644 , 14.07178644, 16.71824745, 14.8619737 ,
       14.11917513, 15.23262515, 13.63456672, 16.38592639, 16.4146364 ,
       14.64468074, 17.25411355, 16.29944753, 14.47197685, 17.13642606,
       14.27112378, 15.77806316, 17.17593608, 14.99851445, 14.99367975,
       17.16778258, 14.98903345, 14.19764943, 16.9071328 , 15.82641085,
       14.47733786, 17.20635627, 14.31947796, 15.03757766, 12.34871839,
       1

In [46]:
# Task 7: Classify Humidity Levels
# ---------------------------------
# Classify each day's humidity level
humidity_classification = np.where(humidity_data < 30, "Dry", 
                                   np.where(humidity_data > 70, "Humid", "Moderate"))

# Count total "Dry" and "Humid" days per location
dry_days_count = np.sum(humidity_classification == "Dry", axis=1)
humid_days_count = np.sum(humidity_classification == "Humid", axis=1)

print("Total 'Dry' days per location:", dry_days_count)
print("Total 'Humid' days per location:", humid_days_count)


Total 'Dry' days per location: [ 99 100 107 101 102 102 101 107  99  98 102 108 101  98 103 106 106 111
  94 107 111  98  98 102 109 107  92 107 106 105  97 114 108 109 120  96
 103 120 113 101  97 103 106 101 117  97 106 101 102 110 106  98  96 108
  88 104 104 105 101  83  94  96 101  98  91  89  97 116 105 106 110  89
 114  78 107 109 115  98  94 105 132 102  90 103 120 101 104 100 109  99
 101 102  96 111 124 107 119  92 117 122 103 111 110 131  97 104 107 103
 106 127 108 103 115 110 101 107 113 118  96 100  83 105  88 111 106  93
 108  99 105 112 104  97  88 106 100 109 109  95  91 101 102  97  94  98
 105  97 101  99 110  94  94  89  87 118  92 113 101 121 106 108 100 114
 105 100 100 115 120 114  98 111 104 126  97  88 113  99  88 101 107  97
 107  88 111  84 120  96 102 114  85  98 110 102  97  91  91 108  90 114
 111  98 112  98  92 106 102 105 106  92  98 101  98  98 100 102  94  96
  97 102 103 120  97  99 128  97 102 105  89 107  98 107 121 116 109 109
 105  97  99 104 103

In [48]:
# Task 8: Apply Daily Pressure Trend to Temperature Data
# -------------------------------------------------------
# Generate a daily atmospheric pressure trend (e.g., sinusoidal pattern)
pressure_trend = 10 * np.sin(np.linspace(0, 2 * np.pi, num_days))

# Apply the trend to adjust daily temperatures for each location
temperature_data_adjusted = temperature_data + pressure_trend

print("Temperature data after applying daily pressure trend adjustment (Celsius):")
print(temperature_data_adjusted)

Temperature data after applying daily pressure trend adjustment (Celsius):
[[27.1848426  20.44834981 -1.09261022 ... 35.25996246 29.80895601
   2.86188426]
 [-7.49249367 26.90975073 -1.47185361 ... 11.62986739 39.76312095
  22.94742626]
 [-3.08795681 10.89864335 26.04307105 ... 34.58537211 23.48338124
  -8.57799921]
 ...
 [10.84471936 39.8105607  24.99138505 ... 27.63269784 37.2049571
  30.79337724]
 [ 1.85936443 30.86074704  9.85314555 ...  3.93808855 10.7920526
   9.17345544]
 [11.72318619 35.61693989  8.99510274 ... -3.80472165  9.31756693
   4.66431459]]
