In [8]:
"""
Wind Statistics
----------------

Topics: Using array methods over different axes, fancy indexing.

1. The data in 'wind.data' has the following format::

        61  1  1 15.04 14.96 13.17  9.29 13.96  9.87 13.67 10.25 10.83 12.58 18.50 15.04
        61  1  2 14.71 16.88 10.83  6.50 12.62  7.67 11.50 10.04  9.79  9.67 17.54 13.83
        61  1  3 18.50 16.88 12.33 10.13 11.17  6.17 11.25  8.04  8.50  7.67 12.75 12.71

   The first three columns are year, month and day.  The
   remaining 12 columns are average windspeeds in knots at 12
   locations in Ireland on that day.

   Use the 'loadtxt' function from numpy to read the data into
   an array.

2. Calculate the min, max and mean windspeeds and standard deviation of the
   windspeeds over all the locations and all the times (a single set of numbers
   for the entire dataset).

3. Calculate the min, max and mean windspeeds and standard deviations of the
   windspeeds at each location over all the days (a different set of numbers
   for each location)

4. Calculate the min, max and mean windspeed and standard deviations of the
   windspeeds across all the locations at each day (a different set of numbers
   for each day)

5. Find the location which has the greatest windspeed on each day (an integer
   column number for each day).

6. Find the year, month and day on which the greatest windspeed was recorded.

7. Find the average windspeed in January for each location.

You should be able to perform all of these operations without using a for
loop or other looping construct.

Bonus
~~~~~

1. Calculate the mean windspeed for each month in the dataset.  Treat
   January 1961 and January 1962 as *different* months. (hint: first find a
   way to create an identifier unique for each month. The second step might
   require a for loop.)

2. Calculate the min, max and mean windspeeds and standard deviations of the
   windspeeds across all locations for each week (assume that the first week
   starts on January 1 1961) for the first 52 weeks. This can be done without
   any for loop.

Bonus Bonus
~~~~~~~~~~~

Calculate the mean windspeed for each month without using a for loop.
(Hint: look at `searchsorted` and `add.reduceat`.)

Notes
~~~~~

These data were analyzed in detail in the following article:

   Haslett, J. and Raftery, A. E. (1989). Space-time Modelling with
   Long-memory Dependence: Assessing Ireland's Wind Power Resource
   (with Discussion). Applied Statistics 38, 1-50.


See :ref:`wind-statistics-solution`.
"""
import numpy as np
from numpy import loadtxt

data = loadtxt("wind.data")
data.shape

(6574, 15)

In [9]:
#Statistics from all the data
min_all, max_all, mean_all, std_all = data[:, 3:].min(), data[:, 3:].max(), \
                                      data[:, 3:].mean(), data[:, 3:].std()
print(min_all, max_all, mean_all, std_all, sep='\n')

0.0
42.54
10.22837377040868
5.603840181095793


In [10]:
#Separate for each location
min_all, max_all, mean_all, std_all = data[:, 3:].min(axis=0), data[:, 3:].max(axis=0), \
                                      data[:, 3:].mean(axis=0), data[:, 3:].std(axis=0)
print(min_all, max_all, mean_all, std_all, sep='\n')

[0.67 0.21 1.5  0.   0.13 0.   0.   0.   0.   0.04 0.13 0.67]
[35.8  33.37 33.84 28.46 37.54 26.16 30.37 31.08 25.88 28.21 42.38 42.54]
[12.36371463 10.64644813 11.66010344  6.30627472 10.45688013  7.09225434
  9.7968345   8.49442044  8.49581838  8.70726803 13.121007   15.59946152]
[5.61918301 5.26820081 5.00738377 3.60513309 4.93536333 3.96838126
 4.97689374 4.49865783 4.16746101 4.50327222 5.83459319 6.69734719]


In [11]:
#Separate for each day
min_all, max_all, mean_all, std_all = data[:, 3:].min(axis=1), data[:, 3:].max(axis=1), \
                                      data[:, 3:].mean(axis=1), data[:, 3:].std(axis=1)
print(min_all, max_all, mean_all, std_all, sep='\n')

[9.29 6.5  6.17 ... 8.71 9.13 9.59]
[18.5  17.54 18.5  ... 29.58 28.79 27.29]
[13.09666667 11.79833333 11.34166667 ... 14.89       15.3675
 15.4025    ]
[2.5773188  3.28972854 3.50543348 ... 5.51175108 5.30456427 5.45971172]


In [12]:
max_speed_each_day = data[:, 3:].max(axis=1)
print(max_speed_each_day)

[18.5  17.54 18.5  ... 29.58 28.79 27.29]


In [13]:
max_windspeed_index = np.unravel_index(data[:, 3:].argmax(), data[:, 3:].shape)
date = data[2161, :3]
print(date[::-1])

[ 2. 12. 66.]


In [23]:
jan_data = data[data[:,1] == 1]
jan_mean = jan_data[:, 3:].mean(axis=0)
print(jan_mean)

[14.86955197 12.92166667 13.29962366  7.19949821 11.67571685  8.05483871
 11.81935484  9.5094086   9.54320789 10.05356631 14.55051971 18.02876344]


In [19]:
#Bonus1
speeds = {}
year = range(61, 79)
month = range(1,13)

for y in year:
    yearly_data = data[data[:,0] == y]
    for m in month:
        monthly_data = data[data[:,1] == m]
        mean = monthly_data[:, 3:].mean()
        speeds[(y, m)] = mean

print(len(speeds))

216


In [24]:
#Bonus2
first_year_data = data[:52 * 7]
first_year_speeds = first_year_data[:, 3:].reshape(-1, 7 * 12)
print(first_year_speeds.mean(axis=1), first_year_speeds.max(axis=1), \
     first_year_speeds.min(axis=1), first_year_speeds.std(axis=1), sep='\n')

[10.30154762  8.895       9.29952381 14.92047619 12.7902381  16.03654762
 13.69488095 11.7597619  13.05642857 10.07535714 12.7502381   9.80142857
 11.27690476  8.75619048  7.65988095  9.45642857  7.72511905 11.66607143
  9.49797619  7.80666667  7.18857143  9.00452381  8.875       9.0952381
 10.33083333 10.00547619 10.6002381  11.00452381  6.49833333  8.98964286
  9.41059524  7.79928571 12.4302381  11.87083333  9.44535714  8.19107143
 14.2775      7.46892857 11.56797619  7.62214286 10.17690476 14.2122619
 14.60130952 10.64011905  7.72940476  8.01428571  8.87488095  9.48154762
 11.85857143 14.06452381  8.65619048 10.34535714]
[18.5  20.71 20.79 27.63 27.71 26.38 28.62 29.63 25.8  22.71 22.95 21.54
 22.5  18.29 16.17 21.09 17.5  28.08 26.63 15.96 20.96 17.96 19.83 25.25
 24.71 21.87 21.29 22.5  21.42 25.37 20.25 14.58 24.3  22.29 24.71 20.25
 33.09 20.96 23.21 19.62 21.04 33.45 30.88 23.58 20.41 32.71 22.58 23.75
 29.33 25.62 24.41 29.33]
[1.79 0.5  1.04 2.17 3.63 8.08 3.42 2.21 5.66 1.71