In [1]:
import pandas as pd
import re

In [2]:
def atom_from(var):
    if var == True or var in ["True", 'true', 'Yes', 'yes']:
        return "true"
    elif var == False or var in ['False', 'false', 'No', 'no']:
        return "false"
    
    if type(var) in [list, set, tuple]:
        return f"[{','.join([atom_from(x) for x in var])}]"
    
    try:
        var = str(int(var))
    except ValueError:
        try:
            var = str(float(var))
        except ValueError:
            var = f'"{var}"'
    return var

In [3]:
def fact_from(series, factname, columns=None):
    if columns is None:
        columns = list(series.index.array)
        
    data = [atom_from(series[col]) for col in columns]
    attributes = ','.join(data)
    fact = f"{factname}({attributes})."

    
    return fact

In [4]:
def dataframe_to_prolog(df, factname, columns=None):
    """
        Returns a list of Prolog facts extracted from a Pandas dataframe
    """
    
    if columns is None:
        columns = df.columns
    
    facts = df.apply(lambda x: fact_from(x, factname, columns), axis=1)
    
    return facts.values.tolist()

In [5]:
def prolog_file_from(df, factname, filename=None, columns=None):
    """
        Converts a Pandas dataframe to a .prolog file
    """
    
    if filename is None:
        filename = f"{factname}.pl"
    
    facts = dataframe_to_prolog(df, factname, columns)
    
    with open(filename, 'w') as f:
        for fact in facts:
            f.write(fact+'\n')

# One final clean-up, and write files of Prolog facts

### CPUs

In [6]:
cpus = pd.read_csv('cpu.csv')

In [7]:
cpus = cpus[[
    'Manufacturer', 'Model', 'Data Width', 'Cores',
    'Socket', 'Operating Frequency', 'Turbo Frequency', 
    "Simultaneous Multithreading",
    'Integrated Graphics', 'Includes CPU Cooler', 
    "L1 Cache", "L2 Cache", "L3 Cache", 
    "Lithography", "Thermal Design Power",
    "Part Number"]]
cpus = cpus[cpus.Model.notnull()]

In [8]:
prolog_file_from(cpus, 'cpu')

### Cases

In [9]:
cases = pd.read_csv('case.csv')

In [10]:
cases = cases[[
    'Color', 'Type', 'Motherboard Compatibility', 
    'Manufacturer', 'Includes Power Supply',
    'External 3.5" Bays', 'External 5.25" Bays',
    'External 5.25" Slim Bays',
    'External 5.25" Slim Slot Load Bays',
    'Internal 2.5" Bays',
    'Internal 3.5" Bays',
    'Internal 5.25" Bays',
    'Front Panel USB 3.0 Ports', 
    'Part Number'
]]

In [11]:
cases['External 3.5" Bays'] = cases['External 3.5" Bays'].fillna(0)
cases['External 5.25" Bays'] = cases['External 5.25" Bays'].fillna(0)
cases['External 5.25" Slim Bays'] = cases['External 5.25" Slim Bays'].fillna(0)
cases[
        'External 5.25" Slim Slot Load Bays'
        ] = cases['External 5.25" Slim Slot Load Bays'].fillna(0)
cases['Internal 2.5" Bays'] = cases['Internal 2.5" Bays'].fillna(0)
cases['Internal 3.5" Bays'] = cases['Internal 3.5" Bays'].fillna(0)
cases['Internal 5.25" Bays'] = cases['Internal 5.25" Bays'].fillna(0)

In [12]:
cases['Motherboard Compatibility'] = cases['Motherboard Compatibility'].apply(
    lambda x: [s.strip() for s in x.split(',')]
)

cases['Color'] = cases['Color'].apply(
    lambda x: str(x).split(' / ')
)

In [13]:
prolog_file_from(cases, 'case')

### CPU Coolers

In [14]:
cpu_coolers = pd.read_csv('cpu-cooler.csv')

In [15]:
cpu_coolers['Bearing Type'].fillna("Unspecified", inplace=True)
cpu_coolers['Color'].fillna("Unspecified", inplace=True)
cpu_coolers['Model'].fillna(cpu_coolers['Part Number'], inplace=True)
cpu_coolers['Radiator Size'].fillna("N/A", inplace=True)

In [16]:
cpu_coolers['Supported Sockets'] = cpu_coolers['Supported Sockets'].apply(
    lambda sockets: [s.strip() for s in sockets.split(',')]
)

In [17]:
cpu_coolers = cpu_coolers[cpu_coolers['Fan RPM'].notna()]
cpu_coolers = cpu_coolers[cpu_coolers['Noise Level'].notna()]

In [18]:
def dbsplit(dbs: str):
    dbs = dbs.split('-')
    dbs = [int(re.search(r"\d+", x).group(0)) for x in dbs]
    
    if len(dbs) == 1:
        dbs = [dbs[0], dbs[0]]
    
    return dbs

In [19]:
cpu_coolers["Min Noise Level"] = cpu_coolers['Noise Level'].apply(
    lambda x: dbsplit(x)[0]
)
cpu_coolers["Max Noise Level"] = cpu_coolers['Noise Level'].apply(
    lambda x: dbsplit(x)[1]
)

In [20]:
def rpmsplit(rpms: str):
    rpms = [x for x in rpms.split(' ') if x.isdigit()]
    
    if len(rpms) == 1:
        rpms.append(rpms[0])
    
    return rpms

In [21]:
cpu_coolers['Min Fan RPM'] = cpu_coolers['Fan RPM'].apply(
    lambda x: rpmsplit(x)[0])
cpu_coolers['Max Fan RPM'] = cpu_coolers['Fan RPM'].apply(
    lambda x: rpmsplit(x)[1])

In [22]:
cpu_coolers = cpu_coolers[[
    'Manufacturer', 'Model', 'Supported Sockets',
    'Bearing Type',
    'Min Fan RPM', 'Max Fan RPM',
    'Min Noise Level', 'Max Noise Level',
    'Liquid Cooled', 'Radiator Size', 'Part Number'
]]

In [23]:
prolog_file_from(cpu_coolers, 'cpu_cooler')

### Internal Hard Drives

In [24]:
hard_drives = pd.read_csv('internal-hard-drive.csv')

In [25]:
hard_drives = hard_drives[[
    'Manufacturer', 'Capacity', 'Interface', 'Form Factor',
    'RPM', 'NAND Flash Type', 'Cache', 'Hybrid SSD Cache', 
    'GB/$1.00', 'Price/GB', 
    'Part Number'
]]

In [26]:
def tb_to_gb(tb):
    cap = tb.split(' ')
    
    if len(cap) != 2:
        return int(cap[0])
    
    cap, uom = tuple(cap)
    if uom == 'TB':
        cap = float(cap)
        cap = int(cap * 1024)
    
    return cap

In [27]:
hard_drives['Capacity'] = hard_drives['Capacity'].apply(tb_to_gb)

In [28]:
def fix_inches(x):
    return re.sub(r"\"", " in", x)

In [29]:
hard_drives['Form Factor'] = hard_drives['Form Factor'].apply(fix_inches)

In [30]:
prolog_file_from(hard_drives, 'internal_hard_drive')

### Memory

In [31]:
memory = pd.read_csv('memory.csv')

In [32]:
def memory_speed_split(mem):
    mem = mem.split('-')
    ddr = mem[0]
    speed = mem[1]
    return ddr, speed

In [33]:
memory['DDR'] = memory['Speed'].apply(lambda m: memory_speed_split(m)[0])

In [34]:
memory['Speed'] = memory['Speed'].apply(lambda m: memory_speed_split(m)[1])

In [35]:
memory['Number of Dimms'] = memory['Size'].apply(
    lambda m: int(str(m).split('(')[1].split('x')[0].strip()))

In [36]:
memory['Size'] = memory['Size'].apply(
    lambda m: [m for m in m.split(' ') if m.isdigit()][0])

In [37]:
memory = memory[[
    'Manufacturer', 'DDR', 'Speed', 'Size', 'Number of Dimms',
    'CAS Latency', 'ECC', 'Heat Spreader', 'Registered',
    'Timing', 'Voltage', 'Type', 'Price/GB', 'Part Number'
]]

In [38]:
prolog_file_from(memory, 'memory')

### Motherboards

In [39]:
motherboards = pd.read_csv('motherboard.csv')

In [40]:
motherboards.fillna(0, inplace=True)

In [41]:
motherboards['Maximum Supported Memory'] = motherboards['Maximum Supported Memory'].apply(
    lambda m: int(str(m).split(' ')[0]))

In [42]:
motherboards.drop('Model', axis=1, inplace=True)

In [43]:
motherboards = motherboards[[
    'Manufacturer', 
]]

In [44]:
motherboards.columns

Index(['Manufacturer'], dtype='object')

In [45]:
prolog_file_from(motherboards, 'motherboard')

### Power Supplies

In [46]:
psus = pd.read_csv('power-supply.csv')

In [47]:
psus = psus[[
    'Wattage', 'Efficiency', 'Efficiency Certification',
    'Modular', 'Manufacturer', 'Fanless', 'Output',
    'Type', 'Color', 'PCI-Express 6+2-Pin Connectors',
    'PCI-Express 6-Pin Connectors', 'PCI-Express 8-Pin Connectors',
    'Part Number'
]]

In [48]:
psus.Output.fillna("", inplace=True)
psus.Output = psus.Output.apply(
    lambda x: "Not Specified" if x=='' else [s.strip() for s in str(x).split(',')]
)

In [49]:
psus.Fanless.fillna('False', inplace=True)
psus.Color.fillna('N/A', inplace=True)
psus.Efficiency.fillna('Not Rated', inplace=True)
psus['Efficiency Certification'].fillna('N/A', inplace=True)
psus['PCI-Express 6+2-Pin Connectors'].fillna(0, inplace=True)
psus['PCI-Express 6-Pin Connectors'].fillna(0, inplace=True)
psus['PCI-Express 8-Pin Connectors'].fillna(0, inplace=True)
psus.Wattage = psus.Wattage.apply(
    lambda w: int(str(w).split(' ')[0]))

In [50]:
psus['PCI-Express 6+2-Pin Connectors'] = psus['PCI-Express 6+2-Pin Connectors'].apply(
    lambda x: int(x))
psus['PCI-Express 6-Pin Connectors'] = psus['PCI-Express 6-Pin Connectors'].apply(
    lambda x: int(x))
psus['PCI-Express 8-Pin Connectors'] = psus['PCI-Express 8-Pin Connectors'].apply(
    lambda x: int(x))

In [51]:
prolog_file_from(psus, 'power_supply')

### Video Cards

In [52]:
gpus = pd.read_csv('video-card.csv')

In [53]:
gpus['Core Clock'].fillna('Not Specified', inplace=True)
gpus['Boost Clock'].fillna('Not Specified', inplace=True)
gpus['Color'].fillna('Not Specified', inplace=True)
gpus.Length.fillna('Not Specified', inplace=True)
gpus['Supports Freesync'].fillna(False, inplace=True)
gpus['Supports G-Sync'].fillna(False, inplace=True)
gpus.VGA.fillna(0, inplace=True)
gpus.VHDCI.fillna(0, inplace=True)
gpus['DVI-D Dual-Link'].fillna(0, inplace=True)
gpus['DVI-D Single-Link'].fillna(0, inplace=True)
gpus['DVI-I Dual-Link'].fillna(0, inplace=True)
gpus['S-Video'].fillna(0, inplace=True)
gpus.HDMI.fillna(0, inplace=True)
gpus['Mini-HDMI'].fillna(0, inplace=True)
gpus.DisplayPort.fillna(0, inplace=True)
gpus['Mini-Display Port'].fillna(0, inplace=True)
gpus.VirtualLink.fillna(False, inplace=True)

In [54]:
gpus.Length = gpus.Length.apply(fix_inches)

In [56]:
gpus = gpus[[
    'Chipset', 'Manufacturer', 'Memory Size', 'Memory Type',  
    'Core Clock', 'Boost Clock', 'Interface', 'Color', 
    'Length', 'Fan', 'TDP',     
    'Supports Freesync', 'Supports G-Sync', 
    'CrossFire Support', 'SLI Support',  
    'VGA', 'VHDCI',
    'DVI-D Dual-Link', 'DVI-D Single-Link', 'DVI-I Dual-Link',
    'S-Video', 'HDMI', 'Mini-HDMI',
    'DisplayPort', 'Mini-Display Port',
    'VirtualLink',
    'Part Number',
]]

In [57]:
prolog_file_from(gpus, 'video_card')