In [13]:
import pandas as pd
import requests
import struct
from io import BytesIO

def read_dat_file_from_url(url, columns_info):
    """
    Reads a .dat file from the given URL based on the provided columns information and returns a Pandas DataFrame.

    Parameters:
        url (str): The URL of the .dat file.
        columns_info (list): List of dictionaries containing column information.

    Returns:
        pd.DataFrame: DataFrame containing the data from the .dat file.
    """
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = BytesIO(response.content)

        # Read binary data based on columns information
        records = []
        row_format = ''.join([f"{column['BYTES']}s" for column in columns_info])
        record_bytes = sum([column['BYTES'] for column in columns_info])

        # Validate record length
        if len(data.getvalue()) % record_bytes != 0:
            raise ValueError("File size is not a multiple of the record length. Check the file and format.")

        data.seek(0)
        while True:
            record = data.read(record_bytes)
            if not record:
                break
            if len(record) != record_bytes:
                raise ValueError(f"Record size mismatch. Expected {record_bytes}, got {len(record)}.")
            unpacked_record = struct.unpack(row_format, record)
            parsed_record = []
            for i, value in enumerate(unpacked_record):
                column = columns_info[i]
                if column['DATA_TYPE'] == 'LSB_INTEGER':
                    parsed_value = int.from_bytes(value, byteorder='little', signed=True)
                elif column['DATA_TYPE'] == 'LSB_UNSIGNED_INTEGER':
                    parsed_value = int.from_bytes(value, byteorder='little', signed=False)
                else:
                    parsed_value = value.decode('utf-8').strip()
                if parsed_value == column.get('MISSING_CONSTANT'):
                    parsed_value = None
                parsed_record.append(parsed_value)
            records.append(parsed_record)

        df = pd.DataFrame(records, columns=[col['NAME'] for col in columns_info])
        return df

    except requests.exceptions.RequestException as e:
        print(f"Error fetching the .dat file: {e}")
        return None
    except ValueError as e:
        print(f"ValueError: {e}")
        return None

def parse_fmt_file(fmt_content):
    """
    Parses the content of LOLARDR.FMT file to extract column definitions.

    Parameters:
        fmt_content (str): Content of the LOLARDR.FMT file as a string.

    Returns:
        list: List of dictionaries with column definitions.
    """
    lines = fmt_content.splitlines()
    columns = []
    column = {}
    for line in lines:
        if 'OBJECT = COLUMN' in line:
            column = {}
        elif 'COLUMN_NUMBER' in line:
            column['COLUMN_NUMBER'] = int(line.split('=')[1].strip()) if '=' in line else None
        elif 'BYTES' in line:
            column['BYTES'] = int(line.split('=')[1].strip()) if '=' in line else None
        elif 'START_BYTE' in line:
            column['START_BYTE'] = int(line.split('=')[1].strip()) if '=' in line else None
        elif 'NAME' in line:
            column['NAME'] = line.split('=')[1].strip().strip('"') if '=' in line else None
        elif 'DATA_TYPE' in line:
            column['DATA_TYPE'] = line.split('=')[1].strip() if '=' in line else None
        elif 'UNIT' in line:
            column['UNIT'] = line.split('=')[1].strip().strip("'") if '=' in line else None
        elif 'MISSING_CONSTANT' in line:
            column['MISSING_CONSTANT'] = int(line.split('=')[1].strip()) if '=' in line else None
        elif 'END_OBJECT' in line:
            columns.append(column)
    return columns

# Example usage
url = 'https://pds-geosciences.wustl.edu/lro/lro-l-lola-3-rdr-v1/lrolol_1xxx/data/lola_rdr/lro_es_03/lolardr_123201617.dat'
fmt_url = 'https://pds-geosciences.wustl.edu/lro/lro-l-lola-2-edr-v1/lrolol_0xxx/label/lolardr.fmt'

# Fetch and parse the .fmt file
fmt_response = requests.get(fmt_url)
fmt_response.raise_for_status()
fmt_content = fmt_response.text
columns_info = parse_fmt_file(fmt_content)

# Read the .dat file based on the parsed .fmt information
df = read_dat_file_from_url(url, columns_info)
if df is not None:
    print(df.head())


ValueError: File size is not a multiple of the record length. Check the file and format.
