In [1]:
import pandas as pd
file_path = "/home/marko/projects/tf216/pv/pvfinal.csv"

In [2]:
df = pd.read_csv(file_path, parse_dates=['DATE'], index_col='DATE')

# Dodavanje kolona za mjesec i dan radi lakšeg grupisanja
df['MONTH'] = df.index.month
df['DAY'] = df.index.day

# Kreiranje maskiranog DataFrame-a za period 2002-2023
df = df.loc['2002-01-01':'2023-12-31']

# Funkcija za računanje rangova po mjesecu
def calculate_monthly_ranks(var_name, rank_col_name):
    ranks = []

    for month in range(1, 13):
        # Filter podataka za trenutni mjesec
        month_data = df[df['MONTH'] == month]

        # Dobijanje rastućih rangova za sve dane tog mjeseca
        month_ranks = month_data[var_name].rank(method='min').astype(int)

        # Upis rangova na odgovarajuće pozicije u glavnom DataFrame-u
        df.loc[month_data.index, rank_col_name] = month_ranks

# Računanje rangova za RHUM i TEMP
calculate_monthly_ranks('RHUM', 'LRRHUM')
calculate_monthly_ranks('TEMP', 'LRTEMP')
calculate_monthly_ranks('WDSP', 'LRWDSP')


# Pregled rezultata
print(df[['RHUM', 'LRRHUM', 'TEMP', 'LRTEMP', 'WDSP', 'LRWDSP']].head(20))

             RHUM  LRRHUM  TEMP  LRTEMP  WDSP  LRWDSP
DATE                                                 
2002-01-01  0.849   342.0  -7.8    42.0   1.3   470.0
2002-01-02  0.816   208.0 -12.3    12.0   0.0     1.0
2002-01-03  0.775   117.0  -9.6    23.0   0.6   173.0
2002-01-04  0.846   329.0 -10.2    19.0   0.5   167.0
2002-01-05  0.808   177.0  -9.1    29.0   0.0     1.0
2002-01-06  0.837   293.0  -6.3    68.0   0.3   136.0
2002-01-07  0.829   260.0  -8.1    38.0   0.0     1.0
2002-01-08  0.839   299.0  -5.3    96.0   0.0     1.0
2002-01-09  0.855   363.0  -2.6   209.0   0.0     1.0
2002-01-10  0.869   402.0  -8.0    40.0   0.0     1.0
2002-01-11  0.897   491.0  -7.5    46.0   0.0     1.0
2002-01-12  0.837   293.0  -7.0    58.0   0.0     1.0
2002-01-13  0.897   491.0  -7.7    44.0   0.0     1.0
2002-01-14  0.863   385.0  -7.4    48.0   0.0     1.0
2002-01-15  0.851   351.0  -6.2    71.0   0.0     1.0
2002-01-16  0.817   210.0  -2.9   190.0   0.0     1.0
2002-01-17  0.883   447.0  -

In [3]:
# Funkcija za računanje kratkoročnih rangova po mjesecu i godini
def calculate_short_term_ranks(var_name, rank_col_name):
    ranks = []

    # Grupisanje po godini i mjesecu
    grouped = df.groupby([df.index.year, df.index.month])

    for (year, month), group in grouped:
        # Dobijanje rastućih rangova za sve dane u tom mjesecu i godini
        group_ranks = group[var_name].rank(method='min').astype(int)

        # Upis rangova na odgovarajuće pozicije u glavnom DataFrame-u
        df.loc[group.index, rank_col_name] = group_ranks

# Računanje kratkoročnih rangova za RHUM, TEMP i WDSP
calculate_short_term_ranks('RHUM', 'SRRHUM')
calculate_short_term_ranks('TEMP', 'SRTEMP')
calculate_short_term_ranks('WDSP', 'SRWDSP')

# Pregled rezultata
print(df[['RHUM', 'LRRHUM', 'SRRHUM', 'TEMP', 'LRTEMP', 'SRTEMP', 'WDSP', 'LRWDSP', 'SRWDSP']].head(31))

             RHUM  LRRHUM  SRRHUM  TEMP  LRTEMP  SRTEMP  WDSP  LRWDSP  SRWDSP
DATE                                                                         
2002-01-01  0.849   342.0    18.0  -7.8    42.0     7.0   1.3   470.0    30.0
2002-01-02  0.816   208.0     9.0 -12.3    12.0     1.0   0.0     1.0     1.0
2002-01-03  0.775   117.0     4.0  -9.6    23.0     3.0   0.6   173.0    29.0
2002-01-04  0.846   329.0    17.0 -10.2    19.0     2.0   0.5   167.0    28.0
2002-01-05  0.808   177.0     8.0  -9.1    29.0     4.0   0.0     1.0     1.0
2002-01-06  0.837   293.0    13.0  -6.3    68.0    12.0   0.3   136.0    27.0
2002-01-07  0.829   260.0    12.0  -8.1    38.0     5.0   0.0     1.0     1.0
2002-01-08  0.839   299.0    15.0  -5.3    96.0    14.0   0.0     1.0     1.0
2002-01-09  0.855   363.0    20.0  -2.6   209.0    18.0   0.0     1.0     1.0
2002-01-10  0.869   402.0    23.0  -8.0    40.0     6.0   0.0     1.0     1.0
2002-01-11  0.897   491.0    27.0  -7.5    46.0     9.0   0.0   

In [4]:
# Filtriranje dugoročnog perioda
long_term_df = df.loc['2002-01-01':'2023-12-31']

# Računanje broja dana po mjesecu u dugoročnom periodu
max_ranks_dynamic = long_term_df.groupby(long_term_df.index.month).size().to_dict()

# Funkcija za određivanje nazivnika u phi računu, bez fiksnih vrijednosti
def get_phi_denominator(date):
    month = date.month
    return max_ranks_dynamic[month] + 1

# Računanje phi vrijednosti za svaki dan
df['PHIRHUM'] = df.apply(lambda row: row['LRRHUM'] / get_phi_denominator(row.name) if pd.notnull(row['LRRHUM']) else None, axis=1)
df['PHITEMP'] = df.apply(lambda row: row['LRTEMP'] / get_phi_denominator(row.name) if pd.notnull(row['LRTEMP']) else None, axis=1)
df['PHIWDSP'] = df.apply(lambda row: row['LRWDSP'] / get_phi_denominator(row.name) if pd.notnull(row['LRWDSP']) else None, axis=1)

# Pregled rezultata
print(df[['LRRHUM', 'PHIRHUM', 'LRTEMP', 'PHITEMP', 'LRWDSP', 'PHIWDSP']].head(50))


            LRRHUM   PHIRHUM  LRTEMP   PHITEMP  LRWDSP   PHIWDSP
DATE                                                            
2002-01-01   342.0  0.500732    42.0  0.061493   470.0  0.688141
2002-01-02   208.0  0.304539    12.0  0.017570     1.0  0.001464
2002-01-03   117.0  0.171303    23.0  0.033675   173.0  0.253294
2002-01-04   329.0  0.481698    19.0  0.027818   167.0  0.244510
2002-01-05   177.0  0.259151    29.0  0.042460     1.0  0.001464
2002-01-06   293.0  0.428990    68.0  0.099561   136.0  0.199122
2002-01-07   260.0  0.380673    38.0  0.055637     1.0  0.001464
2002-01-08   299.0  0.437775    96.0  0.140556     1.0  0.001464
2002-01-09   363.0  0.531479   209.0  0.306003     1.0  0.001464
2002-01-10   402.0  0.588580    40.0  0.058565     1.0  0.001464
2002-01-11   491.0  0.718887    46.0  0.067350     1.0  0.001464
2002-01-12   293.0  0.428990    58.0  0.084919     1.0  0.001464
2002-01-13   491.0  0.718887    44.0  0.064422     1.0  0.001464
2002-01-14   385.0  0.563

In [5]:
# Računanje broja dana po godini i mjesecu u skupu podataka
f_denominators = df.groupby([df.index.year, df.index.month]).size().to_dict()

# Funkcija za određivanje nazivnika u F računu
def get_F_denominator(date):
    year = date.year
    month = date.month
    return f_denominators[(year, month)] + 1

# Računanje F vrijednosti za svaki dan
df['FRHUM'] = df.apply(lambda row: row['SRRHUM'] / get_F_denominator(row.name) if pd.notnull(row['SRRHUM']) else None, axis=1)
df['FTEMP'] = df.apply(lambda row: row['SRTEMP'] / get_F_denominator(row.name) if pd.notnull(row['SRTEMP']) else None, axis=1)
df['FWDSP'] = df.apply(lambda row: row['SRWDSP'] / get_F_denominator(row.name) if pd.notnull(row['SRWDSP']) else None, axis=1)

# Pregled rezultata
print(df[['SRRHUM', 'FRHUM', 'SRTEMP', 'FTEMP', 'SRWDSP', 'FWDSP']].head(50))


            SRRHUM     FRHUM  SRTEMP     FTEMP  SRWDSP     FWDSP
DATE                                                            
2002-01-01    18.0  0.562500     7.0  0.218750    30.0  0.937500
2002-01-02     9.0  0.281250     1.0  0.031250     1.0  0.031250
2002-01-03     4.0  0.125000     3.0  0.093750    29.0  0.906250
2002-01-04    17.0  0.531250     2.0  0.062500    28.0  0.875000
2002-01-05     8.0  0.250000     4.0  0.125000     1.0  0.031250
2002-01-06    13.0  0.406250    12.0  0.375000    27.0  0.843750
2002-01-07    12.0  0.375000     5.0  0.156250     1.0  0.031250
2002-01-08    15.0  0.468750    14.0  0.437500     1.0  0.031250
2002-01-09    20.0  0.625000    18.0  0.562500     1.0  0.031250
2002-01-10    23.0  0.718750     6.0  0.187500     1.0  0.031250
2002-01-11    27.0  0.843750     9.0  0.281250     1.0  0.031250
2002-01-12    13.0  0.406250    11.0  0.343750     1.0  0.031250
2002-01-13    27.0  0.843750     8.0  0.250000     1.0  0.031250
2002-01-14    22.0  0.687

In [6]:
# Računanje apsolutnih vrijednosti razlika između F i phi za svaki parametar
df['ABSRHUM'] = (df['FRHUM'] - df['PHIRHUM']).abs()
df['ABSTEMP'] = (df['FTEMP'] - df['PHITEMP']).abs()
df['ABSWDSP'] = (df['FWDSP'] - df['PHIWDSP']).abs()

# Pregled rezultata
print(df[['FRHUM', 'PHIRHUM', 'ABSRHUM', 'FTEMP', 'PHITEMP', 'ABSTEMP', 'FWDSP', 'PHIWDSP', 'ABSWDSP']].head(50))


               FRHUM   PHIRHUM   ABSRHUM     FTEMP   PHITEMP   ABSTEMP  \
DATE                                                                     
2002-01-01  0.562500  0.500732  0.061768  0.218750  0.061493  0.157257   
2002-01-02  0.281250  0.304539  0.023289  0.031250  0.017570  0.013680   
2002-01-03  0.125000  0.171303  0.046303  0.093750  0.033675  0.060075   
2002-01-04  0.531250  0.481698  0.049552  0.062500  0.027818  0.034682   
2002-01-05  0.250000  0.259151  0.009151  0.125000  0.042460  0.082540   
2002-01-06  0.406250  0.428990  0.022740  0.375000  0.099561  0.275439   
2002-01-07  0.375000  0.380673  0.005673  0.156250  0.055637  0.100613   
2002-01-08  0.468750  0.437775  0.030975  0.437500  0.140556  0.296944   
2002-01-09  0.625000  0.531479  0.093521  0.562500  0.306003  0.256497   
2002-01-10  0.718750  0.588580  0.130170  0.187500  0.058565  0.128935   
2002-01-11  0.843750  0.718887  0.124863  0.281250  0.067350  0.213900   
2002-01-12  0.406250  0.428990  0.0227

In [7]:
# Kreiranje kolone PERIOD u formatu 'YYYY-MM'
df['PERIOD'] = df.index.to_period('M').astype(str)

# Grupisanje po periodi i sumiranje apsolutnih razlika
fs_df = df.groupby('PERIOD').agg({
    'ABSRHUM': 'sum',
    'ABSTEMP': 'sum',
    'ABSWDSP': 'sum'
}).reset_index()

# Preimenovanje kolona
fs_df = fs_df.rename(columns={
    'ABSRHUM': 'FSRHUM',
    'ABSTEMP': 'FSTEMP',
    'ABSWDSP': 'FSWDSP'
})

# Dodavanje kolone FSSUM kao zbira FS statistika za RHUM i TEMP
fs_df['FSSUM'] = fs_df['FSRHUM'] + fs_df['FSTEMP']

# Pregled rezultata
print(fs_df.head(50))


     PERIOD     FSRHUM    FSTEMP     FSWDSP      FSSUM
0   2002-01   2.239751  5.587344   3.643302   7.827096
1   2002-02   2.642588  4.753188   7.133219   7.395776
2   2002-03   1.329658  3.133876   5.664257   4.463534
3   2002-04   4.611732  2.833683   7.336782   7.445415
4   2002-05   0.986182  2.084233   9.575448   3.070415
5   2002-06   2.644527  2.193890   8.968035   4.838417
6   2002-07   4.528825  0.917597   6.151629   5.446422
7   2002-08  10.192533  6.858163   8.029603  17.050695
8   2002-09   8.784149  4.496852   7.188131  13.281001
9   2002-10   3.382732  1.727306   6.207769   5.110038
10  2002-11   2.701967  2.808306   3.329022   5.510273
11  2002-12   3.075448  3.439239   5.483666   6.514687
12  2003-01   5.511255  3.427800   4.519079   8.939056
13  2003-02   1.369941  8.870108   5.910467  10.240049
14  2003-03   1.405152  5.828377   9.180774   7.233529
15  2003-04   2.293397  6.498268   5.424821   8.791665
16  2003-05   2.515282  3.938964   7.804310   6.454246
17  2003-0

In [8]:
# Dodavanje pomoćne kolone MONTH za grupisanje (iz PERIOD)
fs_df['MONTH'] = fs_df['PERIOD'].str[5:7]

# Odabir po tri mjeseca sa najmanjim FSSUM vrijednostima za svaki mjesec
top3_df = fs_df.groupby('MONTH', group_keys=False).apply(lambda x: x.nsmallest(3, 'FSSUM'))

# Slaganje po mjesecu (od 01 do 12)
top3_df = top3_df.sort_values(by='MONTH').reset_index(drop=True)

# Pregled rezultata
print(top3_df)


     PERIOD    FSRHUM    FSTEMP    FSWDSP     FSSUM MONTH
0   2010-01  2.466325  1.639230  7.365758  4.105555    01
1   2007-01  1.805637  2.568860  4.785139  4.374497    01
2   2015-01  3.320415  1.461063  8.109627  4.781479    01
3   2023-02  1.320047  1.338009  4.154895  2.658055    02
4   2004-02  1.866345  1.124437  3.794855  2.990782    02
5   2021-02  1.218206  2.141535  3.557933  3.359741    02
6   2008-03  1.234032  1.191938  3.859627  2.425970    03
7   2004-03  2.182925  1.301016  6.794015  3.483940    03
8   2019-03  1.590318  2.171623  2.499817  3.761942    03
9   2011-04  1.187692  1.738812  1.909277  2.926504    04
10  2012-04  1.728076  1.438339  6.009272  3.166415    04
11  2008-04  1.118735  2.215704  2.257430  3.334440    04
12  2005-05  0.917597  1.929081  7.479228  2.846678    05
13  2017-05  1.382412  1.685487  5.938552  3.067899    05
14  2002-05  0.986182  2.084233  9.575448  3.070415    05
15  2015-06  1.839930  1.203114  7.185496  3.043043    06
16  2007-06  1

  top3_df = fs_df.groupby('MONTH', group_keys=False).apply(lambda x: x.nsmallest(3, 'FSSUM'))


In [9]:
# Odabir po jednog predstavnika za svaki mjesec sa minimalnom vrijednošću FSWDSP
final_selection_df = top3_df.groupby('MONTH', group_keys=False).apply(lambda x: x.nsmallest(1, 'FSWDSP'))

# Slaganje po mjesecu (od 01 do 12)
final_selection_df = final_selection_df.sort_values(by='MONTH').reset_index(drop=True)

# Pregled rezultata
print(final_selection_df)


     PERIOD    FSRHUM    FSTEMP    FSWDSP     FSSUM MONTH
0   2007-01  1.805637  2.568860  4.785139  4.374497    01
1   2021-02  1.218206  2.141535  3.557933  3.359741    02
2   2019-03  1.590318  2.171623  2.499817  3.761942    03
3   2011-04  1.187692  1.738812  1.909277  2.926504    04
4   2017-05  1.382412  1.685487  5.938552  3.067899    05
5   2008-06  1.308574  1.491289  5.152994  2.799863    06
6   2008-07  1.416224  2.280289  5.892981  3.696514    07
7   2011-08  2.239339  1.023197  4.482430  3.262537    08
8   2017-09  1.419794  1.079157  3.574447  2.498951    09
9   2020-10  2.340684  2.387765  3.535093  4.728450    10
10  2018-11  1.307452  1.308965  3.125323  2.616417    11
11  2011-12  2.795708  1.418192  3.375915  4.213900    12


  final_selection_df = top3_df.groupby('MONTH', group_keys=False).apply(lambda x: x.nsmallest(1, 'FSWDSP'))


In [10]:
# Lista referentnih mjeseci (perioda) koje ćemo koristiti za izbor podataka
reference_periods = final_selection_df['PERIOD'].tolist()

# Filtriranje dnevnih zapisa iz df koji pripadaju ovim periodama
ry_df = df[df['PERIOD'].isin(reference_periods)][['RHUM', 'MAX', 'MIN', 'TEMP', 'WDSP']].copy()

# Dodavanje DATE kolone iz indeksa (pošto je indeks datum)
ry_df['DATE'] = ry_df.index

# Zamjena godine u datumima na 2024
ry_df['DATE'] = ry_df['DATE'].apply(lambda d: d.replace(year=2024))

# Postavljanje DATE kao indeksa
ry_df = ry_df.set_index('DATE')

# Sortiranje redova po indeksu (kako bi mjeseci bili hronološki od januara do decembra)
ry_df = ry_df.sort_index()

# Eksportovanje rezultata u CSV
ry_df.to_csv('rypv.csv')

# Pregled prvih par redova
print(ry_df.head(50))


             RHUM   MAX   MIN  TEMP  WDSP
DATE                                     
2024-01-01  0.879   4.8 -10.0  -4.2   0.6
2024-01-02  0.856   8.0   0.2   4.0   1.5
2024-01-03  0.877   3.1  -1.2   0.0   0.9
2024-01-04  0.832   7.0  -5.7  -0.4   0.0
2024-01-05  0.895   3.9  -5.7  -1.7   0.0
2024-01-06  0.903   4.1  -3.1  -0.3   0.0
2024-01-07  0.907  -0.6  -5.6  -4.1   0.0
2024-01-08  0.914   2.0  -5.8  -3.1   0.0
2024-01-09  0.914  -0.2  -4.3  -3.1   0.0
2024-01-10  0.921   1.2  -5.3  -3.6   0.0
2024-01-11  0.888   3.8  -5.0  -2.1   0.0
2024-01-12  0.923   7.1  -3.0  -0.3   0.0
2024-01-13  0.791  10.4  -4.7   0.6   0.3
2024-01-14  0.844   5.9  -4.6  -1.1   0.0
2024-01-15  0.921   0.3  -5.9  -3.7   0.0
2024-01-16  0.899   3.2  -9.5  -5.3   0.0
2024-01-17  0.895   3.4  -7.1  -1.8   0.0
2024-01-18  0.897   5.3  -1.2   1.2   0.0
2024-01-19  0.720  15.2   1.1   7.0   0.8
2024-01-20  0.811  14.6   2.5   6.1   0.0
2024-01-21  0.738  16.4  -1.2   5.3   0.4
2024-01-22  0.782  14.6  -1.0   5.