In [1]:
import pandas as pd
file_path = "/home/marko/projects/tf216/pg/pgfinal.csv"

In [2]:
df = pd.read_csv(file_path, parse_dates=['DATE'], index_col='DATE')

# Dodavanje kolona za mjesec i dan radi lakšeg grupisanja
df['MONTH'] = df.index.month
df['DAY'] = df.index.day

# Kreiranje maskiranog DataFrame-a za period 2002-2023
df = df.loc['2002-01-01':'2023-12-31']

# Funkcija za računanje rangova po mjesecu
def calculate_monthly_ranks(var_name, rank_col_name):
    ranks = []

    for month in range(1, 13):
        # Filter podataka za trenutni mjesec
        month_data = df[df['MONTH'] == month]

        # Dobijanje rastućih rangova za sve dane tog mjeseca
        month_ranks = month_data[var_name].rank(method='min').astype(int)

        # Upis rangova na odgovarajuće pozicije u glavnom DataFrame-u
        df.loc[month_data.index, rank_col_name] = month_ranks

# Računanje rangova za RHUM i TEMP
calculate_monthly_ranks('RHUM', 'LRRHUM')
calculate_monthly_ranks('TEMP', 'LRTEMP')
calculate_monthly_ranks('WDSP', 'LRWDSP')


# Pregled rezultata
print(df[['RHUM', 'LRRHUM', 'TEMP', 'LRTEMP', 'WDSP', 'LRWDSP']].head(20))

             RHUM  LRRHUM  TEMP  LRTEMP  WDSP  LRWDSP
DATE                                                 
2002-01-01  0.358    39.0   1.8    56.0   6.6   670.0
2002-01-02  0.360    42.0   0.6    25.0   2.0   467.0
2002-01-03  0.500   161.0   0.1    19.0   3.0   564.0
2002-01-04  0.331    25.0  -1.4    11.0   5.5   656.0
2002-01-05  0.287     6.0   0.8    29.0   5.0   649.0
2002-01-06  0.323    22.0   5.6   240.0   4.2   618.0
2002-01-07  0.490   148.0   4.4   175.0   3.2   577.0
2002-01-08  0.540   198.0   4.3   169.0   2.2   494.0
2002-01-09  0.590   238.0   4.2   162.0   1.2   242.0
2002-01-10  0.572   220.0   5.5   236.0   0.3    55.0
2002-01-11  0.659   311.0   4.0   154.0   0.3    55.0
2002-01-12  0.650   304.0   4.1   158.0   0.4    82.0
2002-01-13  0.692   346.0   2.9    96.0   0.3    55.0
2002-01-14  0.686   341.0   2.7    88.0   0.6   112.0
2002-01-15  0.710   366.0   4.1   158.0   0.2    35.0
2002-01-16  0.863   528.0   5.2   220.0   1.0   169.0
2002-01-17  0.926   612.0   

In [4]:
# Funkcija za računanje kratkoročnih rangova po mjesecu i godini
def calculate_short_term_ranks(var_name, rank_col_name):
    ranks = []

    # Grupisanje po godini i mjesecu
    grouped = df.groupby([df.index.year, df.index.month])

    for (year, month), group in grouped:
        # Dobijanje rastućih rangova za sve dane u tom mjesecu i godini
        group_ranks = group[var_name].rank(method='min').astype(int)

        # Upis rangova na odgovarajuće pozicije u glavnom DataFrame-u
        df.loc[group.index, rank_col_name] = group_ranks

# Računanje kratkoročnih rangova za RHUM, TEMP i WDSP
calculate_short_term_ranks('RHUM', 'SRRHUM')
calculate_short_term_ranks('TEMP', 'SRTEMP')
calculate_short_term_ranks('WDSP', 'SRWDSP')

# Pregled rezultata
print(df[['RHUM', 'LRRHUM', 'SRRHUM', 'TEMP', 'LRTEMP', 'SRTEMP', 'WDSP', 'LRWDSP', 'SRWDSP']].head(31))

             RHUM  LRRHUM  SRRHUM  TEMP  LRTEMP  SRTEMP  WDSP  LRWDSP  SRWDSP
DATE                                                                         
2002-01-01  0.358    39.0     4.0   1.8    56.0     5.0   6.6   670.0    31.0
2002-01-02  0.360    42.0     5.0   0.6    25.0     3.0   2.0   467.0    24.0
2002-01-03  0.500   161.0     7.0   0.1    19.0     2.0   3.0   564.0    26.0
2002-01-04  0.331    25.0     3.0  -1.4    11.0     1.0   5.5   656.0    30.0
2002-01-05  0.287     6.0     1.0   0.8    29.0     4.0   5.0   649.0    29.0
2002-01-06  0.323    22.0     2.0   5.6   240.0    17.0   4.2   618.0    28.0
2002-01-07  0.490   148.0     6.0   4.4   175.0    13.0   3.2   577.0    27.0
2002-01-08  0.540   198.0     8.0   4.3   169.0    12.0   2.2   494.0    25.0
2002-01-09  0.590   238.0    10.0   4.2   162.0    11.0   1.2   242.0    23.0
2002-01-10  0.572   220.0     9.0   5.5   236.0    16.0   0.3    55.0     5.0
2002-01-11  0.659   311.0    14.0   4.0   154.0     8.0   0.3   

In [6]:
# Filtriranje dugoročnog perioda
long_term_df = df.loc['2002-01-01':'2023-12-31']

# Računanje broja dana po mjesecu u dugoročnom periodu
max_ranks_dynamic = long_term_df.groupby(long_term_df.index.month).size().to_dict()

# Funkcija za određivanje nazivnika u phi računu, bez fiksnih vrijednosti
def get_phi_denominator(date):
    month = date.month
    return max_ranks_dynamic[month] + 1

# Računanje phi vrijednosti za svaki dan
df['PHIRHUM'] = df.apply(lambda row: row['LRRHUM'] / get_phi_denominator(row.name) if pd.notnull(row['LRRHUM']) else None, axis=1)
df['PHITEMP'] = df.apply(lambda row: row['LRTEMP'] / get_phi_denominator(row.name) if pd.notnull(row['LRTEMP']) else None, axis=1)
df['PHIWDSP'] = df.apply(lambda row: row['LRWDSP'] / get_phi_denominator(row.name) if pd.notnull(row['LRWDSP']) else None, axis=1)

# Pregled rezultata
print(df[['LRRHUM', 'PHIRHUM', 'LRTEMP', 'PHITEMP', 'LRWDSP', 'PHIWDSP']].head(50))


            LRRHUM   PHIRHUM  LRTEMP   PHITEMP  LRWDSP   PHIWDSP
DATE                                                            
2002-01-01    39.0  0.057101    56.0  0.081991   670.0  0.980966
2002-01-02    42.0  0.061493    25.0  0.036603   467.0  0.683748
2002-01-03   161.0  0.235725    19.0  0.027818   564.0  0.825769
2002-01-04    25.0  0.036603    11.0  0.016105   656.0  0.960469
2002-01-05     6.0  0.008785    29.0  0.042460   649.0  0.950220
2002-01-06    22.0  0.032211   240.0  0.351391   618.0  0.904832
2002-01-07   148.0  0.216691   175.0  0.256223   577.0  0.844802
2002-01-08   198.0  0.289898   169.0  0.247438   494.0  0.723280
2002-01-09   238.0  0.348463   162.0  0.237189   242.0  0.354319
2002-01-10   220.0  0.322108   236.0  0.345534    55.0  0.080527
2002-01-11   311.0  0.455344   154.0  0.225476    55.0  0.080527
2002-01-12   304.0  0.445095   158.0  0.231332    82.0  0.120059
2002-01-13   346.0  0.506589    96.0  0.140556    55.0  0.080527
2002-01-14   341.0  0.499

In [8]:
# Računanje broja dana po godini i mjesecu u skupu podataka
f_denominators = df.groupby([df.index.year, df.index.month]).size().to_dict()

# Funkcija za određivanje nazivnika u F računu
def get_F_denominator(date):
    year = date.year
    month = date.month
    return f_denominators[(year, month)] + 1

# Računanje F vrijednosti za svaki dan
df['FRHUM'] = df.apply(lambda row: row['SRRHUM'] / get_F_denominator(row.name) if pd.notnull(row['SRRHUM']) else None, axis=1)
df['FTEMP'] = df.apply(lambda row: row['SRTEMP'] / get_F_denominator(row.name) if pd.notnull(row['SRTEMP']) else None, axis=1)
df['FWDSP'] = df.apply(lambda row: row['SRWDSP'] / get_F_denominator(row.name) if pd.notnull(row['SRWDSP']) else None, axis=1)

# Pregled rezultata
print(df[['SRRHUM', 'FRHUM', 'SRTEMP', 'FTEMP', 'SRWDSP', 'FWDSP']].head(50))


            SRRHUM     FRHUM  SRTEMP     FTEMP  SRWDSP     FWDSP
DATE                                                            
2002-01-01     4.0  0.125000     5.0  0.156250    31.0  0.968750
2002-01-02     5.0  0.156250     3.0  0.093750    24.0  0.750000
2002-01-03     7.0  0.218750     2.0  0.062500    26.0  0.812500
2002-01-04     3.0  0.093750     1.0  0.031250    30.0  0.937500
2002-01-05     1.0  0.031250     4.0  0.125000    29.0  0.906250
2002-01-06     2.0  0.062500    17.0  0.531250    28.0  0.875000
2002-01-07     6.0  0.187500    13.0  0.406250    27.0  0.843750
2002-01-08     8.0  0.250000    12.0  0.375000    25.0  0.781250
2002-01-09    10.0  0.312500    11.0  0.343750    23.0  0.718750
2002-01-10     9.0  0.281250    16.0  0.500000     5.0  0.156250
2002-01-11    14.0  0.437500     8.0  0.250000     5.0  0.156250
2002-01-12    13.0  0.406250     9.0  0.281250    10.0  0.312500
2002-01-13    18.0  0.562500     7.0  0.218750     5.0  0.156250
2002-01-14    16.0  0.500

In [10]:
# Računanje apsolutnih vrijednosti razlika između F i phi za svaki parametar
df['ABSRHUM'] = (df['FRHUM'] - df['PHIRHUM']).abs()
df['ABSTEMP'] = (df['FTEMP'] - df['PHITEMP']).abs()
df['ABSWDSP'] = (df['FWDSP'] - df['PHIWDSP']).abs()

# Pregled rezultata
print(df[['FRHUM', 'PHIRHUM', 'ABSRHUM', 'FTEMP', 'PHITEMP', 'ABSTEMP', 'FWDSP', 'PHIWDSP', 'ABSWDSP']].head(50))


               FRHUM   PHIRHUM   ABSRHUM     FTEMP   PHITEMP   ABSTEMP  \
DATE                                                                     
2002-01-01  0.125000  0.057101  0.067899  0.156250  0.081991  0.074259   
2002-01-02  0.156250  0.061493  0.094757  0.093750  0.036603  0.057147   
2002-01-03  0.218750  0.235725  0.016975  0.062500  0.027818  0.034682   
2002-01-04  0.093750  0.036603  0.057147  0.031250  0.016105  0.015145   
2002-01-05  0.031250  0.008785  0.022465  0.125000  0.042460  0.082540   
2002-01-06  0.062500  0.032211  0.030289  0.531250  0.351391  0.179859   
2002-01-07  0.187500  0.216691  0.029191  0.406250  0.256223  0.150027   
2002-01-08  0.250000  0.289898  0.039898  0.375000  0.247438  0.127562   
2002-01-09  0.312500  0.348463  0.035963  0.343750  0.237189  0.106561   
2002-01-10  0.281250  0.322108  0.040858  0.500000  0.345534  0.154466   
2002-01-11  0.437500  0.455344  0.017844  0.250000  0.225476  0.024524   
2002-01-12  0.406250  0.445095  0.0388

In [12]:
# Kreiranje kolone PERIOD u formatu 'YYYY-MM'
df['PERIOD'] = df.index.to_period('M').astype(str)

# Grupisanje po periodi i sumiranje apsolutnih razlika
fs_df = df.groupby('PERIOD').agg({
    'ABSRHUM': 'sum',
    'ABSTEMP': 'sum',
    'ABSWDSP': 'sum'
}).reset_index()

# Preimenovanje kolona
fs_df = fs_df.rename(columns={
    'ABSRHUM': 'FSRHUM',
    'ABSTEMP': 'FSTEMP',
    'ABSWDSP': 'FSWDSP'
})

# Dodavanje kolone FSSUM kao zbira FS statistika za RHUM i TEMP
fs_df['FSSUM'] = fs_df['FSRHUM'] + fs_df['FSTEMP']

# Pregled rezultata
print(fs_df.head(50))


     PERIOD     FSRHUM     FSTEMP    FSWDSP      FSSUM
0   2002-01   2.207357   2.718109  5.116169   4.925467
1   2002-02   2.429371   5.085542  6.884355   7.514913
2   2002-03   2.625824   3.826089  2.879575   6.451913
3   2002-04   1.720072   2.002342  2.353375   3.722415
4   2002-05   1.850842   2.950174  8.016014   4.801016
5   2002-06   6.832219   5.902201  4.173979  12.734420
6   2002-07  11.926107  10.383190  7.064925  22.309297
7   2002-08  11.497575  10.896596  5.306094  22.394171
8   2002-09   5.166415   6.453467  1.730125  11.619882
9   2002-10   4.103770   5.720489  5.308428   9.824259
10  2002-11   1.353326   1.588161  5.033039   2.941487
11  2002-12   4.965822   1.137582  6.241902   6.103404
12  2003-01   2.557787   3.237326  4.121431   5.795113
13  2003-02   6.437299   5.169420  2.507152  11.606719
14  2003-03   7.772282   2.663388  3.264870  10.435670
15  2003-04   3.760773   3.958665  2.798692   7.719438
16  2003-05   5.045617   6.130124  4.543878  11.175741
17  2003-0

In [13]:
# Dodavanje pomoćne kolone MONTH za grupisanje (iz PERIOD)
fs_df['MONTH'] = fs_df['PERIOD'].str[5:7]

# Odabir po tri mjeseca sa najmanjim FSSUM vrijednostima za svaki mjesec
top3_df = fs_df.groupby('MONTH', group_keys=False).apply(lambda x: x.nsmallest(3, 'FSSUM'))

# Slaganje po mjesecu (od 01 do 12)
top3_df = top3_df.sort_values(by='MONTH').reset_index(drop=True)

# Pregled rezultata
print(top3_df)


     PERIOD    FSRHUM    FSTEMP     FSWDSP     FSSUM MONTH
0   2015-01  1.502471  1.237875   4.089449  2.740346    01
1   2013-01  1.042185  2.067487   2.310533  3.109672    01
2   2016-01  1.251556  1.905015   3.595123  3.156570    01
3   2008-02  1.517149  0.808896   5.681565  2.326045    02
4   2004-02  1.082422  1.789496   3.949089  2.871919    02
5   2007-02  1.745648  1.630059   3.624737  3.375707    02
6   2011-03  1.331259  1.539440   1.442030  2.870699    03
7   2020-03  1.570827  2.204932   2.915996  3.775760    03
8   2005-03  0.856515  2.953056   5.094208  3.809572    03
9   2019-04  0.934752  1.790396   4.564053  2.725148    04
10  2010-04  2.146259  1.379288   1.446293  3.525548    04
11  2002-04  1.720072  2.002342   2.353375  3.722415    04
12  2013-05  1.102398  1.584187   2.241078  2.686585    05
13  2012-05  1.680774  1.196788   2.313232  2.877562    05
14  2011-05  1.443402  1.537290   2.225933  2.980692    05
15  2007-06  1.996584  1.473671   8.535015  3.470255    

  top3_df = fs_df.groupby('MONTH', group_keys=False).apply(lambda x: x.nsmallest(3, 'FSSUM'))


In [14]:
# Odabir po jednog predstavnika za svaki mjesec sa minimalnom vrijednošću FSWDSP
final_selection_df = top3_df.groupby('MONTH', group_keys=False).apply(lambda x: x.nsmallest(1, 'FSWDSP'))

# Slaganje po mjesecu (od 01 do 12)
final_selection_df = final_selection_df.sort_values(by='MONTH').reset_index(drop=True)

# Pregled rezultata
print(final_selection_df)


     PERIOD    FSRHUM    FSTEMP    FSWDSP     FSSUM MONTH
0   2013-01  1.042185  2.067487  2.310533  3.109672    01
1   2007-02  1.745648  1.630059  3.624737  3.375707    02
2   2011-03  1.331259  1.539440  1.442030  2.870699    03
3   2010-04  2.146259  1.379288  1.446293  3.525548    04
4   2011-05  1.443402  1.537290  2.225933  2.980692    05
5   2013-06  0.725245  1.917671  1.960958  2.642916    06
6   2019-07  1.595351  1.308474  1.889870  2.903825    07
7   2007-08  1.686493  0.946468  4.832083  2.632961    08
8   2004-09  1.957689  1.348250  1.969011  3.305939    09
9   2013-10  2.671578  1.883144  2.941526  4.554722    10
10  2002-11  1.353326  1.588161  5.033039  2.941487    11
11  2011-12  1.752608  0.830115  1.414714  2.582723    12


  final_selection_df = top3_df.groupby('MONTH', group_keys=False).apply(lambda x: x.nsmallest(1, 'FSWDSP'))


In [17]:
# Lista referentnih mjeseci (perioda) koje ćemo koristiti za izbor podataka
reference_periods = final_selection_df['PERIOD'].tolist()

# Filtriranje dnevnih zapisa iz df koji pripadaju ovim periodama
ry_df = df[df['PERIOD'].isin(reference_periods)][['RHUM', 'MAX', 'MIN', 'TEMP', 'WDSP']].copy()

# Dodavanje DATE kolone iz indeksa (pošto je indeks datum)
ry_df['DATE'] = ry_df.index

# Zamjena godine u datumima na 2024
ry_df['DATE'] = ry_df['DATE'].apply(lambda d: d.replace(year=2024))

# Postavljanje DATE kao indeksa
ry_df = ry_df.set_index('DATE')

# Sortiranje redova po indeksu (kako bi mjeseci bili hronološki od januara do decembra)
ry_df = ry_df.sort_index()

# Eksportovanje rezultata u CSV
ry_df.to_csv('rypg.csv')

# Pregled prvih par redova
print(ry_df.head(50))


             RHUM   MAX  MIN  TEMP  WDSP
DATE                                    
2024-01-01  0.686  12.6 -0.2   5.0   0.8
2024-01-02  0.753  14.2  2.1   7.8   0.9
2024-01-03  0.709  15.7  5.0   9.0   1.1
2024-01-04  0.612  16.6  3.0   9.2   1.4
2024-01-05  0.573  14.7  5.9  10.7   1.6
2024-01-06  0.331  12.1  6.2   9.1   3.9
2024-01-07  0.440  11.5  2.0   5.5   3.1
2024-01-08  0.319   7.2  1.2   4.1   3.7
2024-01-09  0.519   8.4  0.0   3.8   0.9
2024-01-10  0.800   5.6 -1.7   2.4   0.8
2024-01-11  0.933   9.2  3.4   6.2   1.2
2024-01-12  0.372  12.9  5.2   8.5   4.1
2024-01-13  0.767   4.7  1.3   3.4   1.2
2024-01-14  0.811  13.8  4.8  10.1   3.2
2024-01-15  0.867  13.4  6.8   8.8   3.6
2024-01-16  0.896  12.0  6.2   8.1   2.5
2024-01-17  0.959   6.7  4.8   5.7   1.3
2024-01-18  0.920   7.6  5.3   6.3   1.5
2024-01-19  0.804   7.3  2.0   5.5   1.4
2024-01-20  0.980  10.0  5.3   7.7   1.0
2024-01-21  0.892  13.3  8.3  10.3   1.6
2024-01-22  0.922  11.9  7.2   9.3   1.5
2024-01-23  0.85