In [1]:
import pandas as pd
file_path = "/home/marko/projects/tf216/nk/nkfinal.csv"

In [2]:
df = pd.read_csv(file_path, parse_dates=['DATE'], index_col='DATE')

# Dodavanje kolona za mjesec i dan radi lakšeg grupisanja
df['MONTH'] = df.index.month
df['DAY'] = df.index.day

# Kreiranje maskiranog DataFrame-a za period 2002-2023
df = df.loc['2002-01-01':'2023-12-31']

# Funkcija za računanje rangova po mjesecu
def calculate_monthly_ranks(var_name, rank_col_name):
    ranks = []

    for month in range(1, 13):
        # Filter podataka za trenutni mjesec
        month_data = df[df['MONTH'] == month]

        # Dobijanje rastućih rangova za sve dane tog mjeseca
        month_ranks = month_data[var_name].rank(method='min').astype(int)

        # Upis rangova na odgovarajuće pozicije u glavnom DataFrame-u
        df.loc[month_data.index, rank_col_name] = month_ranks

# Računanje rangova za RHUM i TEMP
calculate_monthly_ranks('RHUM', 'LRRHUM')
calculate_monthly_ranks('TEMP', 'LRTEMP')
calculate_monthly_ranks('WDSP', 'LRWDSP')


# Pregled rezultata
print(df[['RHUM', 'LRRHUM', 'TEMP', 'LRTEMP', 'WDSP', 'LRWDSP']].head(20))

             RHUM  LRRHUM  TEMP  LRTEMP  WDSP  LRWDSP
DATE                                                 
2002-01-01  0.453    49.0  -5.6    14.0   5.6   628.0
2002-01-02  0.614   195.0  -4.6    20.0   1.5   186.0
2002-01-03  0.526   104.0  -6.6    12.0   4.6   584.0
2002-01-04  0.444    43.0  -7.1    11.0   5.4   617.0
2002-01-05  0.437    36.0  -4.7    19.0   5.9   641.0
2002-01-06  0.545   126.0  -0.1   137.0   4.7   587.0
2002-01-07  0.551   133.0   2.4   326.0   5.4   617.0
2002-01-08  0.593   176.0   1.7   263.0   4.4   578.0
2002-01-09  0.638   220.0   0.9   218.0   3.5   502.0
2002-01-10  0.681   275.0   0.4   178.0   0.3    23.0
2002-01-11  0.751   347.0   0.4   178.0   0.1    17.0
2002-01-12  0.756   355.0   0.2   154.0   0.8    63.0
2002-01-13  0.670   268.0   0.1   148.0   0.9    83.0
2002-01-14  0.774   373.0   0.6   189.0   0.4    34.0
2002-01-15  0.911   503.0   1.7   263.0   0.0     1.0
2002-01-16  0.923   513.0   0.3   166.0   0.5    38.0
2002-01-17  0.964   562.0   

In [3]:
# Funkcija za računanje kratkoročnih rangova po mjesecu i godini
def calculate_short_term_ranks(var_name, rank_col_name):
    ranks = []

    # Grupisanje po godini i mjesecu
    grouped = df.groupby([df.index.year, df.index.month])

    for (year, month), group in grouped:
        # Dobijanje rastućih rangova za sve dane u tom mjesecu i godini
        group_ranks = group[var_name].rank(method='min').astype(int)

        # Upis rangova na odgovarajuće pozicije u glavnom DataFrame-u
        df.loc[group.index, rank_col_name] = group_ranks

# Računanje kratkoročnih rangova za RHUM, TEMP i WDSP
calculate_short_term_ranks('RHUM', 'SRRHUM')
calculate_short_term_ranks('TEMP', 'SRTEMP')
calculate_short_term_ranks('WDSP', 'SRWDSP')

# Pregled rezultata
print(df[['RHUM', 'LRRHUM', 'SRRHUM', 'TEMP', 'LRTEMP', 'SRTEMP', 'WDSP', 'LRWDSP', 'SRWDSP']].head(31))

             RHUM  LRRHUM  SRRHUM  TEMP  LRTEMP  SRTEMP  WDSP  LRWDSP  SRWDSP
DATE                                                                         
2002-01-01  0.453    49.0     3.0  -5.6    14.0     3.0   5.6   628.0    30.0
2002-01-02  0.614   195.0     8.0  -4.6    20.0     5.0   1.5   186.0    20.0
2002-01-03  0.526   104.0     4.0  -6.6    12.0     2.0   4.6   584.0    26.0
2002-01-04  0.444    43.0     2.0  -7.1    11.0     1.0   5.4   617.0    28.0
2002-01-05  0.437    36.0     1.0  -4.7    19.0     4.0   5.9   641.0    31.0
2002-01-06  0.545   126.0     5.0  -0.1   137.0     6.0   4.7   587.0    27.0
2002-01-07  0.551   133.0     6.0   2.4   326.0    21.0   5.4   617.0    28.0
2002-01-08  0.593   176.0     7.0   1.7   263.0    18.0   4.4   578.0    25.0
2002-01-09  0.638   220.0     9.0   0.9   218.0    16.0   3.5   502.0    24.0
2002-01-10  0.681   275.0    11.0   0.4   178.0    12.0   0.3    23.0     7.0
2002-01-11  0.751   347.0    13.0   0.4   178.0    12.0   0.1   

In [4]:
# Filtriranje dugoročnog perioda
long_term_df = df.loc['2002-01-01':'2023-12-31']

# Računanje broja dana po mjesecu u dugoročnom periodu
max_ranks_dynamic = long_term_df.groupby(long_term_df.index.month).size().to_dict()

# Funkcija za određivanje nazivnika u phi računu, bez fiksnih vrijednosti
def get_phi_denominator(date):
    month = date.month
    return max_ranks_dynamic[month] + 1

# Računanje phi vrijednosti za svaki dan
df['PHIRHUM'] = df.apply(lambda row: row['LRRHUM'] / get_phi_denominator(row.name) if pd.notnull(row['LRRHUM']) else None, axis=1)
df['PHITEMP'] = df.apply(lambda row: row['LRTEMP'] / get_phi_denominator(row.name) if pd.notnull(row['LRTEMP']) else None, axis=1)
df['PHIWDSP'] = df.apply(lambda row: row['LRWDSP'] / get_phi_denominator(row.name) if pd.notnull(row['LRWDSP']) else None, axis=1)

# Pregled rezultata
print(df[['LRRHUM', 'PHIRHUM', 'LRTEMP', 'PHITEMP', 'LRWDSP', 'PHIWDSP']].head(50))


            LRRHUM   PHIRHUM  LRTEMP   PHITEMP  LRWDSP   PHIWDSP
DATE                                                            
2002-01-01    49.0  0.071742    14.0  0.020498   628.0  0.919473
2002-01-02   195.0  0.285505    20.0  0.029283   186.0  0.272328
2002-01-03   104.0  0.152269    12.0  0.017570   584.0  0.855051
2002-01-04    43.0  0.062958    11.0  0.016105   617.0  0.903367
2002-01-05    36.0  0.052709    19.0  0.027818   641.0  0.938507
2002-01-06   126.0  0.184480   137.0  0.200586   587.0  0.859444
2002-01-07   133.0  0.194729   326.0  0.477306   617.0  0.903367
2002-01-08   176.0  0.257687   263.0  0.385066   578.0  0.846266
2002-01-09   220.0  0.322108   218.0  0.319180   502.0  0.734993
2002-01-10   275.0  0.402635   178.0  0.260615    23.0  0.033675
2002-01-11   347.0  0.508053   178.0  0.260615    17.0  0.024890
2002-01-12   355.0  0.519766   154.0  0.225476    63.0  0.092240
2002-01-13   268.0  0.392387   148.0  0.216691    83.0  0.121523
2002-01-14   373.0  0.546

In [5]:
# Računanje broja dana po godini i mjesecu u skupu podataka
f_denominators = df.groupby([df.index.year, df.index.month]).size().to_dict()

# Funkcija za određivanje nazivnika u F računu
def get_F_denominator(date):
    year = date.year
    month = date.month
    return f_denominators[(year, month)] + 1

# Računanje F vrijednosti za svaki dan
df['FRHUM'] = df.apply(lambda row: row['SRRHUM'] / get_F_denominator(row.name) if pd.notnull(row['SRRHUM']) else None, axis=1)
df['FTEMP'] = df.apply(lambda row: row['SRTEMP'] / get_F_denominator(row.name) if pd.notnull(row['SRTEMP']) else None, axis=1)
df['FWDSP'] = df.apply(lambda row: row['SRWDSP'] / get_F_denominator(row.name) if pd.notnull(row['SRWDSP']) else None, axis=1)

# Pregled rezultata
print(df[['SRRHUM', 'FRHUM', 'SRTEMP', 'FTEMP', 'SRWDSP', 'FWDSP']].head(50))


            SRRHUM     FRHUM  SRTEMP     FTEMP  SRWDSP     FWDSP
DATE                                                            
2002-01-01     3.0  0.093750     3.0  0.093750    30.0  0.937500
2002-01-02     8.0  0.250000     5.0  0.156250    20.0  0.625000
2002-01-03     4.0  0.125000     2.0  0.062500    26.0  0.812500
2002-01-04     2.0  0.062500     1.0  0.031250    28.0  0.875000
2002-01-05     1.0  0.031250     4.0  0.125000    31.0  0.968750
2002-01-06     5.0  0.156250     6.0  0.187500    27.0  0.843750
2002-01-07     6.0  0.187500    21.0  0.656250    28.0  0.875000
2002-01-08     7.0  0.218750    18.0  0.562500    25.0  0.781250
2002-01-09     9.0  0.281250    16.0  0.500000    24.0  0.750000
2002-01-10    11.0  0.343750    12.0  0.375000     7.0  0.218750
2002-01-11    13.0  0.406250    12.0  0.375000     5.0  0.156250
2002-01-12    14.0  0.437500    10.0  0.312500    14.0  0.437500
2002-01-13    10.0  0.312500     9.0  0.281250    15.0  0.468750
2002-01-14    17.0  0.531

In [6]:
# Računanje apsolutnih vrijednosti razlika između F i phi za svaki parametar
df['ABSRHUM'] = (df['FRHUM'] - df['PHIRHUM']).abs()
df['ABSTEMP'] = (df['FTEMP'] - df['PHITEMP']).abs()
df['ABSWDSP'] = (df['FWDSP'] - df['PHIWDSP']).abs()

# Pregled rezultata
print(df[['FRHUM', 'PHIRHUM', 'ABSRHUM', 'FTEMP', 'PHITEMP', 'ABSTEMP', 'FWDSP', 'PHIWDSP', 'ABSWDSP']].head(50))


               FRHUM   PHIRHUM   ABSRHUM     FTEMP   PHITEMP   ABSTEMP  \
DATE                                                                     
2002-01-01  0.093750  0.071742  0.022008  0.093750  0.020498  0.073252   
2002-01-02  0.250000  0.285505  0.035505  0.156250  0.029283  0.126967   
2002-01-03  0.125000  0.152269  0.027269  0.062500  0.017570  0.044930   
2002-01-04  0.062500  0.062958  0.000458  0.031250  0.016105  0.015145   
2002-01-05  0.031250  0.052709  0.021459  0.125000  0.027818  0.097182   
2002-01-06  0.156250  0.184480  0.028230  0.187500  0.200586  0.013086   
2002-01-07  0.187500  0.194729  0.007229  0.656250  0.477306  0.178944   
2002-01-08  0.218750  0.257687  0.038937  0.562500  0.385066  0.177434   
2002-01-09  0.281250  0.322108  0.040858  0.500000  0.319180  0.180820   
2002-01-10  0.343750  0.402635  0.058885  0.375000  0.260615  0.114385   
2002-01-11  0.406250  0.508053  0.101803  0.375000  0.260615  0.114385   
2002-01-12  0.437500  0.519766  0.0822

In [7]:
# Kreiranje kolone PERIOD u formatu 'YYYY-MM'
df['PERIOD'] = df.index.to_period('M').astype(str)

# Grupisanje po periodi i sumiranje apsolutnih razlika
fs_df = df.groupby('PERIOD').agg({
    'ABSRHUM': 'sum',
    'ABSTEMP': 'sum',
    'ABSWDSP': 'sum'
}).reset_index()

# Preimenovanje kolona
fs_df = fs_df.rename(columns={
    'ABSRHUM': 'FSRHUM',
    'ABSTEMP': 'FSTEMP',
    'ABSWDSP': 'FSWDSP'
})

# Dodavanje kolone FSSUM kao zbira FS statistika za RHUM i TEMP
fs_df['FSSUM'] = fs_df['FSRHUM'] + fs_df['FSTEMP']

# Pregled rezultata
print(fs_df.head(50))


     PERIOD    FSRHUM    FSTEMP    FSWDSP      FSSUM
0   2002-01  2.256772  3.514458  5.985725   5.771230
1   2002-02  3.407362  4.194700  5.664264   7.602062
2   2002-03  2.196010  4.308748  2.461567   6.504758
3   2002-04  3.904885  2.229857  5.783368   6.134742
4   2002-05  3.880307  3.375778  6.963443   7.256085
5   2002-06  1.851837  1.472061  2.268606   3.323898
6   2002-07  5.838122  2.622392  4.462116   8.460514
7   2002-08  9.842194  8.478816  5.420754  18.321010
8   2002-09  6.645991  7.435801  3.245815  14.081792
9   2002-10  4.764367  6.765877  5.037610  11.530243
10  2002-11  2.328193  0.976233  1.754185   3.304426
11  2002-12  3.930820  1.537976  4.724973   5.468796
12  2003-01  2.364156  3.889367  1.843887   6.253523
13  2003-02  6.730070  9.434194  4.550449  16.164264
14  2003-03  7.581991  2.764916  1.586567  10.346907
15  2003-04  4.888146  1.722512  5.597775   6.610658
16  2003-05  5.776720  6.341966  2.729777  12.118686
17  2003-06  5.354009  6.951979  1.882387  12.

In [8]:
# Dodavanje pomoćne kolone MONTH za grupisanje (iz PERIOD)
fs_df['MONTH'] = fs_df['PERIOD'].str[5:7]

# Odabir po tri mjeseca sa najmanjim FSSUM vrijednostima za svaki mjesec
top3_df = fs_df.groupby('MONTH', group_keys=False).apply(lambda x: x.nsmallest(3, 'FSSUM'))

# Slaganje po mjesecu (od 01 do 12)
top3_df = top3_df.sort_values(by='MONTH').reset_index(drop=True)

# Pregled rezultata
print(top3_df)


     PERIOD    FSRHUM    FSTEMP    FSWDSP     FSSUM MONTH
0   2011-01  2.310944  1.649616  3.226574  3.960560    01
1   2016-01  2.515190  1.658904  2.422355  4.174094    01
2   2022-01  3.067121  1.229228  3.123399  4.296349    01
3   2004-02  1.808682  1.359378  1.594962  3.168060    02
4   2018-02  1.953210  1.936024  4.387682  3.889234    02
5   2011-02  2.774310  1.239273  2.234727  4.013582    02
6   2011-03  1.781936  1.606698  1.996523  3.388635    03
7   2015-03  1.441252  2.821376  2.497255  4.262628    03
8   2007-03  1.825037  2.443768  1.847776  4.268805    03
9   2010-04  1.271827  1.175443  1.742326  2.447270    04
10  2006-04  2.189059  1.198575  6.386023  3.387634    04
11  2019-04  2.040262  1.468742  4.001025  3.509004    04
12  2021-05  1.121202  1.543192  3.713992  2.664394    05
13  2013-05  2.291133  0.871934  5.949854  3.163067    05
14  2012-05  1.776034  1.413891  4.774982  3.189925    05
15  2015-06  1.325606  2.004831  5.250403  3.330438    06
16  2008-06  1

  top3_df = fs_df.groupby('MONTH', group_keys=False).apply(lambda x: x.nsmallest(3, 'FSSUM'))


In [9]:
# Odabir po jednog predstavnika za svaki mjesec sa minimalnom vrijednošću FSWDSP
final_selection_df = top3_df.groupby('MONTH', group_keys=False).apply(lambda x: x.nsmallest(1, 'FSWDSP'))

# Slaganje po mjesecu (od 01 do 12)
final_selection_df = final_selection_df.sort_values(by='MONTH').reset_index(drop=True)

# Pregled rezultata
print(final_selection_df)


     PERIOD    FSRHUM    FSTEMP    FSWDSP     FSSUM MONTH
0   2016-01  2.515190  1.658904  2.422355  4.174094    01
1   2004-02  1.808682  1.359378  1.594962  3.168060    02
2   2007-03  1.825037  2.443768  1.847776  4.268805    03
3   2010-04  1.271827  1.175443  1.742326  2.447270    04
4   2021-05  1.121202  1.543192  3.713992  2.664394    05
5   2002-06  1.851837  1.472061  2.268606  3.323898    06
6   2019-07  1.447566  1.312546  3.666179  2.760112    07
7   2007-08  3.059938  1.846038  1.939193  4.905975    08
8   2022-09  1.855400  0.947343  1.747108  2.802743    09
9   2016-10  1.589632  2.954520  2.572795  4.544153    10
10  2002-11  2.328193  0.976233  1.754185  3.304426    11
11  2013-12  1.937408  2.365437  2.550146  4.302846    12


  final_selection_df = top3_df.groupby('MONTH', group_keys=False).apply(lambda x: x.nsmallest(1, 'FSWDSP'))


In [11]:
# Lista referentnih mjeseci (perioda) koje ćemo koristiti za izbor podataka
reference_periods = final_selection_df['PERIOD'].tolist()

# Filtriranje dnevnih zapisa iz df koji pripadaju ovim periodama
ry_df = df[df['PERIOD'].isin(reference_periods)][['RHUM', 'MAX', 'MIN', 'TEMP', 'WDSP']].copy()

# Dodavanje DATE kolone iz indeksa (pošto je indeks datum)
ry_df['DATE'] = ry_df.index

# Zamjena godine u datumima na 2024
ry_df['DATE'] = ry_df['DATE'].apply(lambda d: d.replace(year=2024))

# Postavljanje DATE kao indeksa
ry_df = ry_df.set_index('DATE')

# Sortiranje redova po indeksu (kako bi mjeseci bili hronološki od januara do decembra)
ry_df = ry_df.sort_index()

# Eksportovanje rezultata u CSV
ry_df.to_csv('rynk.csv')

# Pregled prvih par redova
print(ry_df.head(50))


             RHUM   MAX   MIN  TEMP  WDSP
DATE                                     
2024-01-01  0.562   5.2  -8.5  -1.6   2.0
2024-01-02  0.734   6.5  -6.3   0.3   2.2
2024-01-03  0.986   2.2  -0.6   0.8   1.2
2024-01-04  0.986   2.9   1.2   2.1   2.3
2024-01-05  0.952   7.0  -0.5   3.8   1.5
2024-01-06  0.993   7.9   4.2   5.8   2.0
2024-01-07  0.784   6.0  -0.5   3.5   3.4
2024-01-08  0.814   9.9  -1.1   3.9   2.1
2024-01-09  0.651   6.4   2.7   4.7   1.2
2024-01-10  0.499   9.5   6.2   8.8   4.7
2024-01-11  0.485   9.9   8.4   8.9   6.3
2024-01-12  0.510  10.4   3.5   6.3   2.7
2024-01-13  0.986   4.7   0.4   2.4   1.9
2024-01-14  0.993   3.7  -2.6   0.3   1.4
2024-01-15  0.703   5.7   1.4   3.5   2.1
2024-01-16  0.752   2.7  -0.2   0.8   4.3
2024-01-17  0.519  -0.4  -3.7  -1.9   5.9
2024-01-18  0.470  -3.0  -5.9  -4.8   6.4
2024-01-19  0.529   2.2 -10.1  -4.1   2.7
2024-01-20  0.450   6.2  -9.3  -1.6   2.3
2024-01-21  0.614   3.7  -6.1  -0.8   2.4
2024-01-22  0.325   0.7  -5.9  -2.