In [13]:
import numpy as np

# Generate random temperature data in Celsius (-10 to 40 degrees) for 500 locations across 365 days
temperature_data = np.random.uniform(-10, 40, (500, 365))
temperature_data = np.round(temperature_data)  # Rounded to whole numbers

# Generate random humidity data (0 to 100 percent) for 500 locations across 365 days
humidity_data = np.random.uniform(0, 100, (500, 365))
humidity_data = np.round(humidity_data, 2)  # Rounded to 2 decimal places

print("Sample Temperature Data (Celsius):")
print(temperature_data[:5, :5])  # Display a small portion of the array for verification

print("\nSample Humidity Data (%):")
print(humidity_data[:5, :5])  # Display a small portion of the array for verification


Sample Temperature Data (Celsius):
[[ 6. 32. 35. -5. 24.]
 [32. 14.  5. 40.  3.]
 [ 9.  8. 36. 13. 15.]
 [-6. 13. -7.  0. 19.]
 [ 7. 39. 15. 19. 25.]]

Sample Humidity Data (%):
[[93.42  0.31 72.94 62.68 23.69]
 [83.65 12.14 66.37 20.5  96.97]
 [91.67 70.45 56.91 39.42 40.08]
 [24.11 97.31 70.79 27.74 68.82]
 [69.23 40.08 10.9  72.71 61.26]]


In [14]:
# Simulate 5% missing data in temperature and humidity arrays
num_missing = int(0.05 * temperature_data.size)

# Generate random indices for missing data
missing_indices = (np.random.randint(0, 500, num_missing), np.random.randint(0, 365, num_missing))

# Set selected indices to -1 for both temperature and humidity data to represent missing values
temperature_data[missing_indices] = -1
humidity_data[missing_indices] = -1

# Count the number of missing values in each array (count occurrences of -1)
missing_temp_count = np.sum(temperature_data == -1)
missing_humidity_count = np.sum(humidity_data == -1)

print(f"Total missing entries in temperature data: {missing_temp_count}")
print(f"Total missing entries in humidity data: {missing_humidity_count}")

Total missing entries in temperature data: 12443
Total missing entries in humidity data: 8923


In [15]:
# Convert Celsius to Fahrenheit
temperature_data_fahrenheit = temperature_data * 9 / 5 + 32

# Calculate "feels like" discomfort index
discomfort_index = 0.5 * (temperature_data_fahrenheit + 61.0 + 
                           ((temperature_data_fahrenheit - 68.0) * 1.2) + 
                           (humidity_data * 0.094))

# Cap any values in discomfort index above 80
discomfort_index = np.where(discomfort_index > 80, 80, discomfort_index)
discomfort_index = np.round(discomfort_index, 2)

print("\nCapped Discomfort Index (with max value 80):")
print(discomfort_index)



Capped Discomfort Index (with max value 80):
[[41.17 22.87 80.   ... 80.   80.   53.67]
 [80.   53.19 37.92 ... 43.93 80.   55.3 ]
 [47.03 44.05 80.   ... 52.69 53.29 12.3 ]
 ...
 [79.03 80.   30.29 ... 61.91 30.85 78.51]
 [61.91 19.59 22.42 ... 24.08 73.71 35.19]
 [44.43 55.73 80.   ... 70.74 22.87 20.02]]


In [16]:
# Extract January (first 31 days) temperature data
january_temperatures = temperature_data[:, :31]

# Calculate the average January temperature across all locations
average_january_temp = np.mean(january_temperatures[january_temperatures != -1])  # Exclude -1 values
print(f"Average January Temperature across all locations: {average_january_temp:.2f}°C")


Average January Temperature across all locations: 15.23°C


In [18]:
# Mark temperatures exceeding 35°C as potential errors by setting them to -1
temperature_data = np.where(temperature_data > 35, -1, temperature_data)

# Count the number of missing values per location
null_values_per_location = np.sum(temperature_data == -1, axis=1)
print("Null values per location after marking extreme temperatures:")
print(null_values_per_location)


Null values per location after marking extreme temperatures:
[50 50 63 56 60 59 63 42 68 51 54 54 56 45 52 59 55 61 59 53 48 57 53 47
 49 52 51 54 51 53 41 66 57 66 60 56 55 51 61 66 68 48 65 49 69 52 50 50
 43 55 62 60 62 60 51 48 49 61 58 54 47 71 49 53 50 55 59 58 57 67 65 52
 48 54 47 60 45 56 48 54 65 56 50 54 60 61 64 65 55 50 46 63 52 53 49 68
 76 51 58 62 54 43 61 57 65 50 57 63 66 51 54 54 47 52 64 71 55 52 60 63
 40 61 65 70 48 62 54 48 63 58 63 52 60 56 57 59 60 48 47 55 56 57 50 54
 63 47 39 60 52 44 53 52 52 53 62 58 61 59 61 63 60 60 61 53 65 39 60 50
 52 64 58 55 59 59 58 52 69 56 58 49 54 55 54 54 54 50 66 45 61 57 54 57
 52 64 61 54 54 66 68 64 59 51 68 54 60 62 53 57 71 53 49 54 61 73 49 54
 49 43 54 52 54 60 63 70 62 51 57 54 50 59 51 60 58 61 54 62 56 58 61 48
 64 60 52 47 56 57 55 58 60 46 57 63 60 64 59 63 70 52 54 63 58 54 50 58
 56 46 45 52 44 54 49 54 68 57 57 67 54 68 55 53 56 60 44 40 56 47 37 64
 64 59 53 57 58 58 52 57 48 55 55 44 48 52 72 55 60 54 54 61 55

In [25]:
# Assuming temperature_data is available and is a 2D array (500, 365)

# Define the number of days in each quarter (considering the last quarter takes the remaining days)
days_in_quarter = [91, 91, 91, 92]  # Q1, Q2, Q3, Q4

# Initialize an array to hold quarterly averages
quarterly_avg_temp = np.zeros((500, 4))

# Calculate averages for each quarter
start_day = 0
for i in range(4):
    # Extract the quarter's data
    quarter_data = temperature_data[:, start_day:start_day + days_in_quarter[i]]
    
    # Replace -1 with NaN for average calculation to ignore these values
    quarter_data = np.where(quarter_data == -1, np.nan, quarter_data)
    
    # Calculate the average for this quarter, ignoring NaNs
    quarterly_avg_temp[:, i] = np.nanmean(quarter_data, axis=1)
    
    # Update the starting day for the next quarter
    start_day += days_in_quarter[i]

print("Quarterly average temperatures for each location:")
print(quarterly_avg_temp)


Quarterly average temperatures for each location:
[[12.96103896 11.54545455 12.4691358  16.3125    ]
 [10.91025641 11.97368421 12.40789474 12.37647059]
 [10.94871795 13.03947368 12.31081081 12.18918919]
 ...
 [13.28571429 11.43055556 14.49333333 12.85882353]
 [11.22666667 12.95        8.78481013 15.5125    ]
 [10.77777778 12.83544304 14.0125     11.38961039]]


In [23]:
# Classify humidity levels as "Dry" if below 30% and "Humid" if above 70%
humidity_classification = np.where(humidity_data < 30, 'Dry',
                                    np.where(humidity_data > 70, 'Humid', 'Moderate'))

# Count the total number of "Dry" and "Humid" days for each location
dry_days_count = np.sum(humidity_classification == 'Dry', axis=1)
humid_days_count = np.sum(humidity_classification == 'Humid', axis=1)

print("Total number of Dry days for each location:")
print(dry_days_count)
print("Total number of Humid days for each location:")
print(humid_days_count)

Total number of Dry days for each location:
[132 127 137 118 128 121 128 126 131 118 113 120 137 123 125 121 127 106
 149 115 117 145 132 128 122 116 103 115 117 119 117 129 127 141 107 127
 123 110 135 128 128 129 132 106 142 114 115 101 114 116 128 127 121 119
 129 118 107 129 123 108 122 125 108 129 110 135 123 123 127 106 123 129
 122 135 125 129 119 139 123 123 114 111 116 113 128 129 132 118 115 116
 123 135 132 128 121 130 122 116 128 113 121 119 125 124 126 135 137 128
 122 119 114 109 110 123 108 133  98 147 113 130 114 118 120 125 123 137
 129 130 121 114 120 122 131 115 127 132 111 104 119 135 136 113 130 127
 116 125 111 108 112 102 121 130 137 124 133 137 108 128 128 141 140 130
 117 118 106 117 119 131 127 118 104 104 118 114 136 133 115 129 106 125
 127 110 122 106 133 113 119 116 112 118 130 119 131 120 121 135 112 130
 109 130 112 115 131 131 130 115 112 127 132 125 124 111 124 126 119 128
 105 111 116 119 124 111 116 116 128 114 114 119 112 136 121 107 130 106
 125 11

In [24]:
# Simulate daily pressure trends (this is just an example; modify as needed)
pressure_trend = np.random.uniform(-5, 5, (365))  # Random trend for demonstration

# Apply daily pressure trend to temperature data
adjusted_temperature_data = temperature_data + pressure_trend

# Display a small portion of the adjusted temperature data for verification
print("Adjusted temperature data after applying daily pressure trend:")
print(adjusted_temperature_data[:5, :5])

Adjusted temperature data after applying daily pressure trend:
[[ 9.87538449 -0.25050624 33.06030183 -6.42025487 22.70119371]
 [35.87538449 14.74949376  3.06030183 -2.42025487  1.70119371]
 [12.87538449  8.74949376 -2.93969817 11.57974513 13.70119371]
 [-2.12461551 13.74949376 -8.93969817 -1.42025487 17.70119371]
 [ 2.87538449 -0.25050624 13.06030183 17.57974513 23.70119371]]
