In [None]:
# Stage 1

import pandas as pd

# -------------------- STAGE 1: File Paths and Normalization Process --------------------
xrd_bizegelen_path = r"C:/Users/kubra/Desktop/proje/XRD_field.xlsx"
xrd_sertifika_path = r"C:/Users/kubra/Desktop/proje/XRD_lab.xlsx"

df1 = pd.read_excel(xrd_bizegelen_path)
df2 = pd.read_excel(xrd_sertifika_path)

def clean_sonuclar_keep_order(text):
    if pd.isna(text):
        return ""
    minerals = [m.strip().capitalize() for m in text.split(",") if m.strip()]
    return ", ".join(minerals)

# Normalization process (PRESERVING THE ORDER)
df1["sonuclar"] = df1["sonuclar"].apply(clean_sonuclar_keep_order)
df2["sonuclar"] = df2["sonuclar"].apply(clean_sonuclar_keep_order)

# List the mineral names used (for information purposes)
all_minerals_df1 = set(sum([row.split(", ") for row in df1["sonuclar"] if pd.notna(row)], []))
all_minerals_df2 = set(sum([row.split(", ") for row in df2["sonuclar"] if pd.notna(row)], []))
unique_minerals = sorted(all_minerals_df1.union(all_minerals_df2))

print("All mineral names used:")
for mineral in unique_minerals:
    print("-", mineral)

# Save the normalized files
df1.to_excel("BizeGelen_Normalize.xlsx", index=False)
df2.to_excel("Mat_Normalize.xlsx", index=False)


In [None]:
import pandas as pd

# Load the normalized files
df1 = pd.read_excel("BizeGelen_Normalize.xlsx")
df2 = pd.read_excel("Mat_Normalize.xlsx")

# Sample ID sets
numune_set1 = set(df1["numune isareti"])
numune_set2 = set(df2["numune isareti"])

# Samples with common Sample IDs
common_samples = numune_set1.intersection(numune_set2)

# CONDITION 1: samples that exist only in df1
only_in_df1 = df1[df1["numune isareti"].isin(numune_set1 - numune_set2)]

# CONDITION 2: samples that exist only in df2
only_in_df2 = df2[df2["numune isareti"].isin(numune_set2 - numune_set1)]

# CONDITIONS 3 and 4: samples that are common but have differing results
df1_common = df1[df1["numune isareti"].isin(common_samples)].copy()
df2_common = df2[df2["numune isareti"].isin(common_samples)].copy()

merged = pd.merge(df1_common, df2_common, on="numune isareti", suffixes=('_bizegelen', '_sertifika'))
farkli_sonuclar = merged[merged["sonuclar_bizegelen"] != merged["sonuclar_sertifika"]]

# Corresponding parts of differing rows for each file
farkli_df1 = farkli_sonuclar[["numune isareti", "sonuclar_bizegelen"]].rename(columns={"sonuclar_bizegelen": "sonuclar"})
farkli_df2 = farkli_sonuclar[["numune isareti", "sonuclar_sertifika"]].rename(columns={"sonuclar_sertifika": "sonuclar"})

# Combine Conditions 1 and 3 (for BizeGelen)
combined_df1 = pd.concat([only_in_df1, farkli_df1], ignore_index=True)

# Combine Conditions 2 and 4 (for Certificate)
combined_df2 = pd.concat([only_in_df2, farkli_df2], ignore_index=True)

# Excel output
output_file = "xrd_fark_raporu1.xlsx"
with pd.ExcelWriter(output_file) as writer:
    combined_df1.to_excel(writer, sheet_name="Sadece_BizeGelen", index=False)
    combined_df2.to_excel(writer, sheet_name="Sadece_Sertifika", index=False)

print(f"Report successfully generated: {output_file}")


In [None]:
# -------------------- STAGE 3a: APPLY CORRECTIONS --------------------
df1 = pd.read_excel("BizeGelen_Normalize.xlsx")
df2 = pd.read_excel("Mat_Normalize.xlsx")

duzeltmeler_bizegelen = pd.read_excel("xrd_fark_raporu1.xlsx", sheet_name="Sadece_BizeGelen")
duzeltmeler_sertifika = pd.read_excel("xrd_fark_raporu1.xlsx", sheet_name="Sadece_Sertifika")

df1_updated = df1.copy()
for _, row in duzeltmeler_bizegelen.iterrows():
    numune = row["numune isareti"]
    sonuc = row["sonuclar"]
    df1_updated.loc[df1_updated["numune isareti"] == numune, "sonuclar"] = sonuc

df2_updated = df2.copy()
for _, row in duzeltmeler_sertifika.iterrows():
    numune = row["numune isareti"]
    sonuc = row["sonuclar"]
    df2_updated.loc[df2_updated["numune isareti"] == numune, "sonuclar"] = sonuc

df1_updated.to_excel("BizeGelen_Guncel1.xlsx", index=False)
df2_updated.to_excel("Mat_Guncel1.xlsx", index=False)

print("Stage 3: Corrections have been applied and saved to the "Guncel1" files.")

In [None]:
# Stage 3b - If the corrected rows have been added to the original files, highlight them in yellow

from openpyxl import load_workbook
from openpyxl.styles import PatternFill

# Yellow highlight definition
yellow_fill = PatternFill(start_color="FFFF00", end_color="FFFF00", fill_type="solid")

# Open the BizeGelen file and apply highlighting
wb1 = load_workbook("BizeGelen_Guncel1.xlsx")
ws1 = wb1.active

for i, row in df1_updated.iterrows():
    numune = row["numune isareti"]
    if numune in duzeltmeler_bizegelen["numune isareti"].values:
        for cell in ws1[i+2]:  # +2 çünkü 1. satır başlık, openpyxl 1'den başlar
            cell.fill = yellow_fill

wb1.save("BizeGelen_Guncel1.xlsx")

# Open the Mat file and apply highlighting
wb2 = load_workbook("Mat_Guncel1.xlsx")
ws2 = wb2.active

for i, row in df2_updated.iterrows():
    numune = row["numune isareti"]
    if numune in duzeltmeler_sertifika["numune isareti"].values:
        for cell in ws2[i+2]:
            cell.fill = yellow_fill

wb2.save("Mat_Guncel1.xlsx")

print("Stage 3: Corrected rows have been marked in yellow.")


In [None]:
import pandas as pd

# Load the updated normalized files
df1_final = pd.read_excel("BizeGelen_Guncel1.xlsx")
df2_final = pd.read_excel("Mat_Guncel1.xlsx")

# Sample ID sets
set1 = set(df1_final["numune isareti"])
set2 = set(df2_final["numune isareti"])

common_samples = set1.intersection(set2)

# CONDITION 1: samples that exist only in BizeGelen
only_in_df1 = df1_final[df1_final["numune isareti"].isin(set1 - set2)]

# CONDITION 2: samples that exist only in Certificate
only_in_df2 = df2_final[df2_final["numune isareti"].isin(set2 - set1)]

# CONDITIONS 3 and 4: samples that are common but have differing 'results'
df1_common = df1_final[df1_final["numune isareti"].isin(common_samples)].copy()
df2_common = df2_final[df2_final["numune isareti"].isin(common_samples)].copy()

merged = pd.merge(df1_common, df2_common, on="numune isareti", suffixes=('_bizegelen', '_sertifika'))
farkli_sonuclar = merged[merged["sonuclar_bizegelen"] != merged["sonuclar_sertifika"]]

# Corresponding versions of differing rows for each file
farkli_df1 = farkli_sonuclar[["numune isareti", "sonuclar_bizegelen"]].rename(columns={"sonuclar_bizegelen": "sonuclar"})
farkli_df2 = farkli_sonuclar[["numune isareti", "sonuclar_sertifika"]].rename(columns={"sonuclar_sertifika": "sonuclar"})

# Conditions 1 and 3: BizeGelen side
combined_df1 = pd.concat([only_in_df1, farkli_df1], ignore_index=True)

# Conditions 2 and 4: Certificate side
combined_df2 = pd.concat([only_in_df2, farkli_df2], ignore_index=True)

# If there are no differences, show informational message
if combined_df1.empty and combined_df2.empty:
    print("Stage 4: No differences remain in the updated files.")
    mesaj_df = pd.DataFrame(["No differences remain in the updated files."], columns=["Bilgi"])
    mesaj_df.to_excel("xrd_kalan_farklar.xlsx", index=False)
else:
    print(f"Stage 4: Differences were detected in the updated files and saved to 'xrd_remaining_differences.xlsx'.")
    with pd.ExcelWriter("xrd_kalan_farklar.xlsx") as writer:
        combined_df1.to_excel(writer, sheet_name="Sadece_BizeGelen", index=False)
        combined_df2.to_excel(writer, sheet_name="Sadece_Sertifika", index=False)
