In [1]:
import chess.pgn
import pandas as pd

# Función para leer el PGN y crear el DataFrame
def process_pgn(pgn_file):
    pgn = open(pgn_file)
    
    # Lista para almacenar los resultados de las partidas
    data = []

    # Leer todas las partidas del archivo PGN
    while True:
        try:
            game = chess.pgn.read_game(pgn)
            if game is None:
                break

            
            event = game.headers.get("Event", "?")
            white_player = game.headers.get("White", "?")
            black_player = game.headers.get("Black", "?")
            result = game.headers.get("Result", "?")
            utc_date = game.headers.get("UTCDate", "?")
            utc_time = game.headers.get("UTCTime", "?")
            white_elo = game.headers.get("WhiteElo", "?")
            black_elo = game.headers.get("BlackElo", "?")
            white_rating_diff = game.headers.get("WhiteRatingDiff", "?")
            black_rating_diff = game.headers.get("BlackRatingDiff", "?")
            eco = game.headers.get("ECO", "?")
            opening = game.headers.get("Opening", "?")
            time_control = game.headers.get("TimeControl", "?")
            termination = game.headers.get("Termination", "?")

            data.append([event, white_player, black_player, result, utc_date, utc_time,
                         white_elo, black_elo, white_rating_diff, black_rating_diff, eco, 
                         opening, time_control, termination])

        except Exception as e:
            print(f"Error al leer una partida: {e}")
            break
    
    # Dataframe con los datos creados
    df = pd.DataFrame(data, columns=[
        "Event", "White", "Black", "Result", "UTCDate", "UTCTime", "WhiteElo", "BlackElo",
        "WhiteRatingDiff", "BlackRatingDiff", "ECO", "Opening", "TimeControl", "Termination"
    ])
    
    return df

df_result = process_pgn("predict2.pgn")

df_result.to_csv("evaluaciones_partida_input.csv", index=False)

df_result


Unnamed: 0,Event,White,Black,Result,UTCDate,UTCTime,WhiteElo,BlackElo,WhiteRatingDiff,BlackRatingDiff,ECO,Opening,TimeControl,Termination
0,PGN Import,Eduardo,Clase,1-0,?,?,200,1200,?,?,?,?,40/9000:40/9000:40/9000,normal


In [2]:
# Archivo PGN desde la línea 18 hasta el final
def extraer_movimientos_desde_pgn(pgn_path):
    with open(pgn_path, 'r') as file:
        lines = file.readlines()
        # El texto desde la línea 18
        movimientos = "".join(lines[16:]).strip()
    return movimientos

pgn_file_path = 'predict2.pgn'

movimientos_extraidos = extraer_movimientos_desde_pgn(pgn_file_path)

df_result['AN'] = [movimientos_extraidos for _ in range(len(df_result))]

df_result


Unnamed: 0,Event,White,Black,Result,UTCDate,UTCTime,WhiteElo,BlackElo,WhiteRatingDiff,BlackRatingDiff,ECO,Opening,TimeControl,Termination,AN
0,PGN Import,Eduardo,Clase,1-0,?,?,200,1200,?,?,?,?,40/9000:40/9000:40/9000,normal,1. e4 {0.00/1 1} b5 {(Bf1xb5 Bc8-b7) -1.79/1 1...


In [3]:
df_result

Unnamed: 0,Event,White,Black,Result,UTCDate,UTCTime,WhiteElo,BlackElo,WhiteRatingDiff,BlackRatingDiff,ECO,Opening,TimeControl,Termination,AN
0,PGN Import,Eduardo,Clase,1-0,?,?,200,1200,?,?,?,?,40/9000:40/9000:40/9000,normal,1. e4 {0.00/1 1} b5 {(Bf1xb5 Bc8-b7) -1.79/1 1...


In [4]:
import re

log_file_path = '../Analisis3.log'
with open(log_file_path, 'r') as file:
    log_content = file.read()

eval_values = re.findall(r'[\+\-]?\d+[,\.]\d{2}', log_content)

eval_values = [float(value.replace(',', '.')) for value in eval_values]

print(eval_values)


[0.34, -0.24, 1.92, -1.73, 1.83, -1.52, 2.37, -1.9, 2.23, -2.69, 3.82, -3.18, 4.19, -4.57, 4.91, -3.98, 5.68, -3.36, 5.36, -5.75, 6.8, -6.7, 6.9, -6.9, 7.29, -7.08]


In [5]:
df_evals = pd.DataFrame({'eval': [eval_values]})

df_evals.head()


Unnamed: 0,eval
0,"[0.34, -0.24, 1.92, -1.73, 1.83, -1.52, 2.37, ..."


In [6]:
df_result['evals'] = df_evals['eval'].values

df_result


Unnamed: 0,Event,White,Black,Result,UTCDate,UTCTime,WhiteElo,BlackElo,WhiteRatingDiff,BlackRatingDiff,ECO,Opening,TimeControl,Termination,AN,evals
0,PGN Import,Eduardo,Clase,1-0,?,?,200,1200,?,?,?,?,40/9000:40/9000:40/9000,normal,1. e4 {0.00/1 1} b5 {(Bf1xb5 Bc8-b7) -1.79/1 1...,"[0.34, -0.24, 1.92, -1.73, 1.83, -1.52, 2.37, ..."


In [7]:
df_result = df_result.drop(['Event','UTCDate', 'WhiteRatingDiff', 'UTCTime','BlackRatingDiff','Opening'], axis=1)

In [8]:
df_result.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   White        1 non-null      object
 1   Black        1 non-null      object
 2   Result       1 non-null      object
 3   WhiteElo     1 non-null      object
 4   BlackElo     1 non-null      object
 5   ECO          1 non-null      object
 6   TimeControl  1 non-null      object
 7   Termination  1 non-null      object
 8   AN           1 non-null      object
 9   evals        1 non-null      object
dtypes: object(10)
memory usage: 212.0+ bytes


In [9]:
# Como hay mucha fragmentación, y como sé que la letra (A, B, C, D...) representa a un tipo concreto
# de apertura creo una columna nueva solo con el valor del tipo

df_result['ECO_Family'] = df_result['ECO'].str[0]
print(df_result['ECO_Family'].value_counts())
df_result.drop(['ECO'],axis=1)

ECO_Family
?    1
Name: count, dtype: int64


Unnamed: 0,White,Black,Result,WhiteElo,BlackElo,TimeControl,Termination,AN,evals,ECO_Family
0,Eduardo,Clase,1-0,200,1200,40/9000:40/9000:40/9000,normal,1. e4 {0.00/1 1} b5 {(Bf1xb5 Bc8-b7) -1.79/1 1...,"[0.34, -0.24, 1.92, -1.73, 1.83, -1.52, 2.37, ...",?


In [10]:
df_result['Result'] = df_result['Result'].map({
    '1-0': 1,
    '1/2-1/2': 0,
    '0-1': -1})


In [11]:
df_result.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   White        1 non-null      object
 1   Black        1 non-null      object
 2   Result       1 non-null      int64 
 3   WhiteElo     1 non-null      object
 4   BlackElo     1 non-null      object
 5   ECO          1 non-null      object
 6   TimeControl  1 non-null      object
 7   Termination  1 non-null      object
 8   AN           1 non-null      object
 9   evals        1 non-null      object
 10  ECO_Family   1 non-null      object
dtypes: int64(1), object(10)
memory usage: 220.0+ bytes


In [12]:
df_result.to_csv("../data/raw/df_chess_inputs.csv", index=False)