In [44]:
import pandas as pd
from datetime import datetime, timedelta
from dataclasses import dataclass
from typing import List, Optional
import requests
import re

In [None]:
@dataclass
class ProviderFormat:
    """Holds the parsing configuration for a specific electricity provider's CSV format. """
    name: str
    usage_col: str
    timestamp_col: str
    time_sub_col: Optional[str] = None
    date_format: Optional[str] = "%d.%m.%Y %H:%M"
    other_cols: Optional[List[str]] = None
    should_skip_func: Optional[str] = None
    fixup_timestamp: bool = False
    separator: str = ";"
    decimal: str = ","
    skiprows: int = 0
    encoding: str = 'utf-8-sig'
    feedin: bool = False
    end_timestamp_col: Optional[str] = None
    preprocess_date_func: Optional[str] = None

In [46]:
class JavaScriptNetzbetreiberParser:
    """
    Parses the JavaScript netzbetreiber.js file to extract provider configurations.
    """
    
    def __init__(self):
        self.date_format_map = {
            "dd.MM.yyyy HH:mm": "%d.%m.%Y %H:%M",
            "dd.MM.yyyy HH:mm:ss": "%d.%m.%Y %H:%M:%S",
            "dd.MM.yy HH:mm": "%d.%m.%y %H:%M",
            "dd.MM.yy HH:mm:ss": "%d.%m.%y %H:%M:%S",
            "yyyy-MM-dd HH:mm:ss": "%Y-%m-%d %H:%M:%S",
            " dd.MM.yyyy HH:mm:ss": " %d.%m.%Y %H:%M:%S",
            "parseISO": "ISO8601"
        }
    
    def parse_js_file(self, js_content: str) -> List[ProviderFormat]:
        """
        Parse JavaScript content and extract Netzbetreiber configurations.
        """
        providers = []
        
        # Pattern to match export const declarations
        pattern = r'export const (\w+) = new Netzbetreiber\((.*?)\);'
        matches = re.findall(pattern, js_content, re.DOTALL)
        
        for var_name, params_str in matches:
            try:
                provider = self._parse_netzbetreiber_params(var_name, params_str)
                if provider:
                    providers.append(provider)
            except Exception as e:
                print(f"Error parsing {var_name}: {e}")
                continue
        
        return providers
    
    def _parse_netzbetreiber_params(self, var_name: str, params_str: str) -> Optional[ProviderFormat]:
        """
        Parse individual Netzbetreiber constructor parameters.
        """
        # Clean up the parameters string
        params_str = params_str.strip()
        
        # Split parameters - this is complex due to nested functions and arrays
        params = self._split_parameters(params_str)
        
        if len(params) < 6:
            return None
        
        # Extract basic parameters
        name = self._clean_string(params[0])
        usage_col = self._clean_string(params[1])
        timestamp_col = self._clean_string(params[2])
        time_sub_col = self._clean_string(params[3]) if params[3] != 'null' else None
        date_format_js = self._clean_string(params[4])
        
        # Convert date format
        date_format = self.date_format_map.get(date_format_js, date_format_js)
        
        # Extract other fields (array)
        other_cols = []
        if len(params) > 6 and params[6] != 'null':
            other_cols = self._parse_array(params[6])
        
        # Extract should_skip function
        should_skip_func = None
        if len(params) > 7 and params[7] != 'null':
            should_skip_func = params[7]
        
        # Extract fixup_timestamp
        fixup_timestamp = False
        if len(params) > 8:
            fixup_timestamp = params[8].strip().lower() == 'true'
        
        # Extract feedin flag
        feedin = False
        if len(params) > 9:
            feedin = params[9].strip().lower() == 'true'
        
        # Extract end timestamp descriptor
        end_timestamp_col = None
        if len(params) > 10 and params[10] != 'null':
            end_timestamp_col = self._clean_string(params[10])
        
        # Extract preprocess date function
        preprocess_date_func = None
        if len(params) > 11 and params[11] != 'null':
            preprocess_date_func = params[11]
        
        return ProviderFormat(
            name=name,
            usage_col=usage_col,
            timestamp_col=timestamp_col,
            time_sub_col=time_sub_col,
            date_format=date_format,
            other_cols=other_cols,
            should_skip_func=should_skip_func,
            fixup_timestamp=fixup_timestamp,
            feedin=feedin,
            end_timestamp_col=end_timestamp_col,
            preprocess_date_func=preprocess_date_func
        )
    
    def _split_parameters(self, params_str: str) -> List[str]:
        """
        Split parameters while respecting nested structures.
        """
        params = []
        current_param = ""
        paren_depth = 0
        bracket_depth = 0
        in_string = False
        string_char = None

        i = 0
        while i < len(params_str):
            char = params_str[i]
            
            if char == '\\': # Escape character
                    i += 1
                    continue
                
            if not in_string:
                if char in ['"', "'"]: # Within apostrophes
                    in_string = True
                    string_char = char
                    i += 1
                    continue
                elif char == '(': # Within parentheses
                    paren_depth += 1
                elif char == ')':
                    paren_depth -= 1
                elif char == '[': # Within brackets
                    bracket_depth += 1
                elif char == ']':
                    bracket_depth -= 1
                elif char == ',' and paren_depth == 0 and bracket_depth == 0:
                    params.append(current_param.strip())
                    current_param = ""
                    i += 1
                    continue
            else:
                if char == string_char: # and (i == 0 or params_str[i-1] != '\\')
                    in_string = False
                    string_char = None
                    i += 1
                    continue
            
            current_param += char
            i += 1

        if current_param.strip():
            params.append(current_param.strip())
    
        return params
    
    def _clean_string(self, s: str) -> str:
        """
        Clean a string parameter by removing quotes.
        """
        s = s.strip()
        #if s.startswith('"') and s.endswith('"'):
        #    return s[1:-1]
        #if s.startswith("'") and s.endswith("'"):
        #    return s[1:-1]
        return s
    
    def _parse_array(self, array_str: str) -> List[str]:
        """
        Parse a JavaScript array string.
        """
        array_str = array_str.strip()
        if not (array_str.startswith('[') and array_str.endswith(']')):
            return []
        
        content = array_str[1:-1].strip()
        if not content:
            return []
        
        items = []
        current_item = ""
        in_string = False
        string_char = None
        
        for char in content:
            if not in_string:
                if char in ['"', "'"]:
                    in_string = True
                    string_char = char
                elif char == ',':
                    if current_item.strip():
                        items.append(self._clean_string(current_item.strip()))
                    current_item = ""
                    continue
            else:
                if char == string_char:
                    in_string = False
                    string_char = None
            
            current_item += char
        
        if current_item.strip():
            items.append(self._clean_string(current_item.strip()))
        
        return items

In [47]:
js_url="https://github.com/awattar-backtesting/awattar-backtesting.github.io/blob/main/docs/netzbetreiber.js"

In [48]:
js_parser = JavaScriptNetzbetreiberParser()

In [49]:
try:
    response = requests.get(js_url)
    response.raise_for_status()
    provider_formats = js_parser.parse_js_file(response.text)
except Exception as e:
    print(f"Error loading from URL {js_url}: {e}")

## Alternative Bug Fixing

In [50]:
js_content = response.text

In [51]:
self = js_parser

In [52]:
providers = []
        
# Pattern to match export const declarations
pattern = r'export const (\w+) = new Netzbetreiber\((.*?)\);'
matches = re.findall(pattern, js_content, re.DOTALL)

for var_name, params_str in matches:
    try:
        provider = self._parse_netzbetreiber_params(var_name, params_str)
        if provider:
            providers.append(provider)
    except Exception as e:
        print(f"Error parsing {var_name}: {e}")
        continue



In [53]:
# Clean up the parameters string
params_str = params_str.strip()

# Split parameters - this is complex due to nested functions and arrays
params = self._split_parameters(params_str)


In [54]:
params

['eww Wels V2',
 '!Restnetzbezug',
 'BeginDate',
 'null',
 'yyyy-MM-dd HH:mm:ss',
 '(function (usage) {,    return parseFloat(usage.replace(,, .)']

In [55]:
if len(params) < 6:
    pass
    #return None


In [56]:

# Extract basic parameters
name = self._clean_string(params[0])
usage_col = self._clean_string(params[1])
timestamp_col = self._clean_string(params[2])
time_sub_col = self._clean_string(params[3]) if params[3] != 'null' else None
date_format_js = self._clean_string(params[4])

# Convert date format
date_format = self.date_format_map.get(date_format_js, date_format_js)

# Extract other fields (array)
other_cols = []
if len(params) > 6 and params[6] != 'null':
    other_cols = self._parse_array(params[6])

# Extract should_skip function
should_skip_func = None
if len(params) > 7 and params[7] != 'null':
    should_skip_func = params[7]

# Extract fixup_timestamp
fixup_timestamp = False
if len(params) > 8:
    fixup_timestamp = params[8].strip().lower() == 'true'

# Extract feedin flag
feedin = False
if len(params) > 9:
    feedin = params[9].strip().lower() == 'true'

# Extract end timestamp descriptor
end_timestamp_col = None
if len(params) > 10 and params[10] != 'null':
    end_timestamp_col = self._clean_string(params[10])

# Extract preprocess date function
preprocess_date_func = None
if len(params) > 11 and params[11] != 'null':
    preprocess_date_func = params[11]

p = ProviderFormat(
    name=name,
    usage_col=usage_col,
    timestamp_col=timestamp_col,
    time_sub_col=time_sub_col,
    date_format=date_format,
    other_cols=other_cols,
    should_skip_func=should_skip_func,
    fixup_timestamp=fixup_timestamp,
    feedin=feedin,
    end_timestamp_col=end_timestamp_col,
    preprocess_date_func=preprocess_date_func
)

In [57]:
p

ProviderFormat(name='eww Wels V2', usage_col='!Restnetzbezug', timestamp_col='BeginDate', time_sub_col=None, date_format='%Y-%m-%d %H:%M:%S', other_cols=[], should_skip_func=None, fixup_timestamp=False, separator=';', decimal=',', skiprows=0, encoding='utf-8-sig', feedin=False, end_timestamp_col=None, preprocess_date_func=None)