In [None]:
#Web scrapping

import requests

# Login credentials
username = "Guest"
password = "FG_WM"

# Login URL
login_url = "https://www.wm.tu-darmstadt.de/mat-db/signin.php"

# Session object to persist cookies across requests
session = requests.Session()

# Login payload
login_payload = {
    "login": username,
    "password": password
}

# Send POST request to login
login_response = session.post(login_url, data=login_payload)

# Check if login was successful
if login_response.status_code == 200:
    print("Login successful!")

    # URLs of the pages
    base_url = "https://www.wm.tu-darmstadt.de/mat-db/view.php?id="
    start_id = 1
    end_id = 918

    # Fetch HTML content for each page and save it as text
    for page_id in range(start_id, end_id + 1):
        url = base_url + str(page_id)
        response = session.get(url)  # Use the session object for requests to maintain login session
        if response.status_code == 200:
            html_content = response.text

            # Save HTML content to a text file
            output_file = f'output_page_{page_id}.txt'
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(html_content)

            print(f"Page {page_id} content saved to {output_file}")
        else:
            print(f"Failed to fetch page {page_id}")

    # Further requests can be made using the session object
    # For example:
    # response = session.get("https://www.wm.tu-darmstadt.de/mat-db/your_desired_page.php")
else:
    print("Login failed.")

In [None]:
## Organize extract the database

from bs4 import BeautifulSoup
import csv

# Initialize variables to store extracted data
all_data = []

# Define the property names to search for
property_names = ["E", "Rp0.2", "Rm", "A5", "Z", "ν", "K", "n", "σf", "εf", "R′p0.2", "σE", "εE", "NE", "NT", "Tσ", "Tεp", "K′", "n′", "σ′f", "ε′f", "b", "c"]
elements = ['C', 'Mn', 'P', 'S', 'N', 'Cu', 'Fe', 'Al', 'Ni', 'Mo', 'Si', 'Cr']

# Iterate over all the files
for i in range(1, 3):  # assuming files are named 'output_page_1.txt' to 'output_page_20.txt'
    filename = f'output_page_{i}.txt'
    with open(filename, 'r') as file:
        html_content = file.read()

    # Create a BeautifulSoup object
    soup = BeautifulSoup(html_content, 'html.parser')

    # Initialize variables to store extracted data for this file
    properties = {}
    chemical_composition = {}
    heat_treatment_value = None

    # Find the mechanical properties table
    properties_table = soup.find('table', width='100%')
    if properties_table:
        rows = properties_table.find_all('tr')
        for row in rows:
            cells = row.find_all('td')
            if len(cells) == 4:
                property_name = cells[0].text.strip()
                if property_name in property_names:
                    property_value = cells[2].text.strip()
                    properties[property_name] = property_value

    # Find the chemical composition table
    composition_table = soup.find('table', width='550')
    if composition_table:
        rows = composition_table.find_all('tr')
        for row in rows:
            cells = row.find_all('td')
            if len(cells) == 12:
                for i, element in enumerate(elements):
                    chemical_composition[element] = cells[i].text.strip()

    # Find Heat treatment value
    td_elements = soup.find_all('td')
    for td in td_elements:
        if 'Heat treatment:' in td.text:
            next_sibling_td = td.find_next_sibling('td')
            if next_sibling_td is not None:
                heat_treatment_value = next_sibling_td.text.strip()
                break

    # Store the extracted data for this file
    all_data.append({
        'File': filename,
        'Properties': properties,
        'Chemical Composition': chemical_composition,
        'Heat Treatment': heat_treatment_value
    })

# Write the extracted data to a CSV file
with open('chemical_and_mechanical_properties.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)

    # Write header for chemical composition, mechanical properties, and heat treatment
    header_row = ['File'] + elements + property_names + ['Heat Treatment']
    writer.writerow(header_row)

    # Write data for each file
    for data in all_data:
        row = [data['File']]
        row += [data['Chemical Composition'].get(element, '') for element in elements]
        row += [data['Properties'].get(prop, '') for prop in property_names]
        row.append(data['Heat Treatment'])
        writer.writerow(row)