In [15]:
from pathlib import Path
import pandas as pd

### Files to read




In [2]:
RFT_DUMP = "/project/snorre/reservoirmodels/ff/2021.2.0/preprocessing/input/rft/RFT_dump_221220_ZoneAdjusted.txt"
WELL_ALIAS_FILENAME = "/project/snorre/reservoirmodels/ff/2023.0.1/rms/input/well_modelling/well_info/well_alias_rms_ecl.txt"

### Read files


##### Read and massage rfts

In [3]:
RFT_COLS = ["well_name", "datatype", "date", "md", "tvd", "zone", "value", "quality", "something", "val_again", "X", "Y", "Classification"]
RFTS = pd.read_csv(RFT_DUMP, sep=";", skipinitialspace=True, header=0, names=RFT_COLS)

for col_name in RFTS.columns:
    try:
        RFTS[col_name] = RFTS[col_name].str.strip()
    except AttributeError:
        print(f"{col_name} is not string")

RFTS["quality"] = RFTS["quality"].astype(int)


md is not string
tvd is not string
value is not string
quality is not string
something is not string
val_again is not string
X is not string
Y is not string


In [4]:
WELL_ALIASES = pd.read_csv(WELL_ALIAS_FILENAME, sep="\s+", comment="#", header=None, names=["rms", "eclipse"])


### Make dictionary for aliasing between well name and eclipse name

In [5]:
# Add column for actual well name
WELL_ALIASES["well_name"] = WELL_ALIASES.rms.str.replace(r"(RFT_)?(\d{2})_(\d[^_]+)_(.*)", r"NO \2/\3 \4", regex=True)
RENAMER = dict(zip(WELL_ALIASES.well_name, WELL_ALIASES.eclipse))



#### Merge aliases with rft

In [6]:
MERGED = pd.merge(RFTS, WELL_ALIASES, on="well_name")


#### Add error

In [7]:
errors = {1: 1, 2:2.5, 3:5, 4:10, -999: 10}
MERGED["error"] = MERGED.quality.replace(errors)
MERGED.head()

Unnamed: 0,well_name,datatype,date,md,tvd,zone,value,quality,something,val_again,X,Y,Classification,rms,eclipse,error
0,NO 34/4-12 A,Pressure,2010-02-07,2565.6,2456.942,SNML1_3,371.9,3,-999.25,371.9,462096.98,6824565.43,UNKNOWN,RFT_34_4-12_A,R_4_12A,5.0
1,NO 34/4-12 A,Pressure,2010-02-07,2572.6,2463.187,SNML1_3,373.23,3,-999.25,373.23,462093.85,6824565.0,UNKNOWN,RFT_34_4-12_A,R_4_12A,5.0
2,NO 34/4-12 A,Pressure,2010-02-07,2575.0,2465.328,SNML1_3,373.35,4,-999.25,373.35,462092.78,6824564.85,UNKNOWN,RFT_34_4-12_A,R_4_12A,10.0
3,NO 34/4-12 A,Pressure,2010-02-07,2580.8,2470.495,SNLL,373.7,2,-999.25,373.7,462090.17,6824564.48,UNKNOWN,RFT_34_4-12_A,R_4_12A,2.5
4,NO 34/4-12 A,Pressure,2010-02-07,2585.0,2474.235,SNLL,-999.25,1,-999.25,-999.25,462088.28,6824564.21,UNKNOWN,RFT_34_4-12_A,R_4_12A,1.0


### Add a combined field for well and and date, and restart

In [12]:
MERGED["restart"] = "1"
MERGED["COMBINED_INFO"] = MERGED["eclipse"] + " " + MERGED["date"] + MERGED["restart"]

#### Some analysis 


In [11]:
separator = "\n--------------------\n"
print("Datatypes: ", MERGED.dtypes, sep=separator)
print("Description of string columns:", MERGED.describe(include=[object]), sep=separator)
print("Description of float columns:", MERGED.describe(), sep=separator)
print("Types of measurements:", MERGED.datatype.unique().tolist(), sep=separator)
print("Fractions: ", MERGED.datatype.value_counts(normalize=True).round(2), sep=separator)


Datatypes: 
--------------------
well_name          object
datatype           object
date               object
md                float64
tvd               float64
zone               object
value             float64
quality             int64
something         float64
val_again         float64
X                 float64
Y                 float64
Classification     object
rms                object
eclipse            object
error             float64
dtype: object
Description of string columns:
--------------------
              well_name  datatype        date  zone Classification  \
count              3458      3458        3458  3458           3458   
unique              125         1         122    26              1   
top     NO 34/4-K-3 HT2  Pressure  2003-02-09   SN3        UNKNOWN   
freq                 98      3458          98   362           3458   

                     rms  eclipse  
count               3458     3458  
unique               230      230  
top     RFT_34_4-K-3_HT2  

### Create files for general observations

In [19]:
out_dir = Path().cwd() / "test_rfts"
out_dir.mkdir(parents=True, exist_ok=True)
print(f"Exporting to {str(out_dir)}")
well_info_name = out_dir / "well_name_time_restart.txt"
pd.Series(MERGED["COMBINED_INFO"].unique()).to_csv(well_info_name, index=False, header=False)
for well_name in MERGED.well_name.unique():
    sub_set = MERGED.loc[MERGED.well_name==well_name]
    observations = sub_set[["value", "error"]]
    spatials = sub_set[["md", "tvd", "X", "Y", "zone"]]
    ecl_name = sub_set["eclipse"].unique().tolist()[0]

    obs_file_name = out_dir / f"{ecl_name}.obs"
    observations.to_csv(obs_file_name, sep=" ", index=False, header=False)

    space_file_name = out_dir / f"{ecl_name}.txt"
    spatials.to_csv(space_file_name, sep=" ", index=False, header=False)





Exporting to /private/dbs/git/subscript/src/subscript/fmuobs/test_rfts
