In [1]:
import csv
import uuid

# Use these lists to accumulate the rows for each CSV output
applicant_data = []
family_data = []
highschool_data = []
university_data = []
applicantuniversity_data = []
scholarship_data = []
language_data = []
activities_data = []

# We may want to keep track of unique 'family', 'highschool', 'university'
# so that the same high school or university is not re-inserted multiple times.
# For a quick example, we'll just generate a new ID for each row (not recommended for real deduping).
# A real approach would store "name -> ID" in a dict and reuse IDs if the name repeats.
family_id_counter = 1
highschool_id_counter = 1
university_id_counter = 1

# For demonstration only: 
# We'll keep local dictionaries that map certain strings to an ID so we 
# can reuse an ID if we see the same high school / same family etc. again.
family_map = {}
highschool_map = {}
university_map = {}

# Auto-increment for applicant, language, etc. 
applicant_id_counter = 1
activities_id_counter = 1
language_id_counter = 1
scholarship_id_counter = 1
applicant_univ_id_counter = 1

# 1) Open the main CSV with the big dataset
with open("data.csv", mode="r", encoding="utf-8") as infile:
    # If your file has a header row, you can use DictReader
    reader = csv.DictReader(infile)
    
    for row in reader:
        
        # ----------------------------------------------------------
        #  FAMILY TABLE
        # ----------------------------------------------------------
        family_key = (
            row["Anne Egitim Durumu"],
            row["Anne Calisma Durumu"],
            row["Anne Sektor"],
            row["Baba Egitim Durumu"],
            row["Baba Calisma Durumu"],
            row["Baba Sektor"],
            row["Kardes Sayisi"]
        )
        if family_key not in family_map:
            current_family_id = family_id_counter
            family_id_counter += 1
            
            family_map[family_key] = current_family_id
            family_data.append({
                "FamilyID": current_family_id,
                "MotherEducationLevel": row["Anne Egitim Durumu"],
                "MotherEmploymentStatus": row["Anne Calisma Durumu"],
                "MotherSector": row["Anne Sektor"],
                "FatherEducationLevel": row["Baba Egitim Durumu"],
                "FatherEmploymentStatus": row["Baba Calisma Durumu"],
                "FatherSector": row["Baba Sektor"],
                "NumberOfSiblings": row["Kardes Sayisi"]
            })
        else:
            current_family_id = family_map[family_key]
        
        # ----------------------------------------------------------
        #  HIGHSCHOOL TABLE
        # ----------------------------------------------------------
        hs_key = (
            row["Lise Adi"],
            row["Lise Adi Diger"],
            row["Lise Sehir"],
            row["Lise Turu"],
            row["Lise Bolumu"],
            row["Lise Bolum Diger"],
            row["Lise Mezuniyet Notu"]
        )
        if hs_key not in highschool_map:
            current_highschool_id = highschool_id_counter
            highschool_id_counter += 1
            
            highschool_map[hs_key] = current_highschool_id
            highschool_data.append({
                "HighSchoolID": current_highschool_id,
                "HighSchoolName": row["Lise Adi"],
                "OtherHighSchoolName": row["Lise Adi Diger"],
                "HighSchoolCity": row["Lise Sehir"],
                "HighSchoolType": row["Lise Turu"],
                "HighSchoolDepartment": row["Lise Bolumu"],
                "OtherHighSchoolDepartment": row["Lise Bolum Diger"],
                "HighSchoolGraduationGrade": row["Lise Mezuniyet Notu"]
            })
        else:
            current_highschool_id = highschool_map[hs_key]
        
        # ----------------------------------------------------------
        #  UNIVERSITY TABLE
        # ----------------------------------------------------------
        uni_key = (row["Universite Adi"], row["Universite Turu"])
        if uni_key not in university_map:
            current_university_id = university_id_counter
            university_id_counter += 1
            
            university_map[uni_key] = current_university_id
            university_data.append({
                "UniversityID": current_university_id,
                "UniversityName": row["Universite Adi"],
                "UniversityType": row["Universite Turu"]
            })
        else:
            current_university_id = university_map[uni_key]
        
        # ----------------------------------------------------------
        #  APPLICANT TABLE
        # ----------------------------------------------------------
        current_applicant_id = applicant_id_counter
        applicant_id_counter += 1
        
        # In your schema: (ApplicantID, ApplicationYear, etc.)
        applicant_data.append({
            "ApplicantID": current_applicant_id,
            "ApplicationYear": row["Basvuru Yili"],
            "EvaluationScore": row["Degerlendirme Puani"],
            "Gender": row["Cinsiyet"],
            "DateOfBirth": row["Dogum Tarihi"],
            "PlaceOfBirth": row["Dogum Yeri"],
            "ResidenceCity": row["Ikametgah Sehri"],
            "FamilyID": current_family_id,
            "HighSchoolID": current_highschool_id
        })
        
        # ----------------------------------------------------------
        #  APPLICANTUNIVERSITY TABLE
        # ----------------------------------------------------------
        current_applicantuniversity_id = applicant_univ_id_counter
        applicant_univ_id_counter += 1
        
        applicantuniversity_data.append({
            "ApplicantUniversityID": current_applicantuniversity_id,
            "ApplicantID": current_applicant_id,
            "UniversityID": current_university_id,
            "Major": row["Bölüm"],
            "UniversityYear": row["Universite Kacinci Sinif"],
            "UniversityGPA": row["Universite Not Ortalamasi"],
            "GraduatedFromAnotherUniversity": row["Daha Once Baska Bir Universiteden Mezun Olmus"],
            "PreviousUniversity": row["Daha Önceden Mezun Olunduysa, Mezun Olunan Üniversite"]
        })
        
        # ----------------------------------------------------------
        #  SCHOLARSHIP TABLE
        # ----------------------------------------------------------
        current_scholarship_id = scholarship_id_counter
        scholarship_id_counter += 1
        
        scholarship_data.append({
            "ScholarshipID": current_scholarship_id,
            "ApplicantID": current_applicant_id,
            "ScholarshipPercentage": row["Burslu ise Burs Yuzdesi"],
            "IsReceivingScholarship": row["Burs Aliyor mu?"],
            "OtherInstitutionName": row["Burs Aldigi Baska Kurum"],
            "OtherInstitutionScholarshipAmount": row["Baska Kurumdan Aldigi Burs Miktari"]
            # If you need an extra boolean for "Baska Bir Kurumdan Burs Aliyor mu?"
            # you can add it here as well.
        })
        
        # ----------------------------------------------------------
        #  LANGUAGE TABLE
        # ----------------------------------------------------------
        # For simplicity, let's assume each row has only English info:
        current_language_id = language_id_counter
        language_id_counter += 1
        
        language_data.append({
            "LanguageID": current_language_id,
            "ApplicantID": current_applicant_id,
            "KnowsEnglish": row["Ingilizce Biliyor musunuz?"],
            "EnglishProficiency": row["Ingilizce Seviyeniz?"]
        })
        
        # ----------------------------------------------------------
        #  ACTIVITIES TABLE
        # ----------------------------------------------------------
        current_activities_id = activities_id_counter
        activities_id_counter += 1
        
        activities_data.append({
            "ActivitiesID": current_activities_id,
            "ApplicantID": current_applicant_id,
            "EntrepreneurshipClubMember": row["Girisimcilik Kulupleri Tarzi Bir Kulube Uye misiniz?"],
            "ClubName": row["Uye Oldugunuz Kulubun Ismi"],
            "ProfessionalSport": row["Profesyonel Bir Spor Daliyla Mesgul musunuz?"],
            "RoleInSport": row["Spor Dalindaki Rolunuz Nedir?"],
            "NGO_Member": row["Aktif olarak bir STK üyesi misiniz?"],
            "NGO_Name": row["Hangi STK'nin Uyesisiniz?"],
            "ParticipatedInNGOProject": row["Stk Projesine Katildiniz Mi?"],
            "EntrepreneurshipExperience": row["Girisimcilikle Ilgili Deneyiminiz Var Mi?"],
            "EntrepreneurshipExperienceDetails": row["Girisimcilikle Ilgili Deneyiminizi Aciklayabilir misiniz?"]
        })
        
# ------------------------------------------------------------------
# 3) Write Each Table’s Data into a Separate CSV File
# ------------------------------------------------------------------

# Helper to write out CSVs
def write_csv(filename, fieldnames, data):
    with open(filename, mode="w", newline="", encoding="utf-8") as outfile:
        writer = csv.DictWriter(outfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(data)

# applicant
write_csv(
    "applicant.csv",
    [
        "ApplicantID",
        "ApplicationYear",
        "EvaluationScore",
        "Gender",
        "DateOfBirth",
        "PlaceOfBirth",
        "ResidenceCity",
        "FamilyID",
        "HighSchoolID"
    ],
    applicant_data
)

# family
write_csv(
    "family.csv",
    [
        "FamilyID",
        "MotherEducationLevel",
        "MotherEmploymentStatus",
        "MotherSector",
        "FatherEducationLevel",
        "FatherEmploymentStatus",
        "FatherSector",
        "NumberOfSiblings"
    ],
    family_data
)

# highschool
write_csv(
    "highschool.csv",
    [
        "HighSchoolID",
        "HighSchoolName",
        "OtherHighSchoolName",
        "HighSchoolCity",
        "HighSchoolType",
        "HighSchoolDepartment",
        "OtherHighSchoolDepartment",
        "HighSchoolGraduationGrade"
    ],
    highschool_data
)

# university
write_csv(
    "university.csv",
    [
        "UniversityID",
        "UniversityName",
        "UniversityType"
    ],
    university_data
)

# applicantuniversity
write_csv(
    "applicantuniversity.csv",
    [
        "ApplicantUniversityID",
        "ApplicantID",
        "UniversityID",
        "Major",
        "UniversityYear",
        "UniversityGPA",
        "GraduatedFromAnotherUniversity",
        "PreviousUniversity"
    ],
    applicantuniversity_data
)

# scholarship
write_csv(
    "scholarship.csv",
    [
        "ScholarshipID",
        "ApplicantID",
        "ScholarshipPercentage",
        "IsReceivingScholarship",
        "OtherInstitutionName",
        "OtherInstitutionScholarshipAmount"
    ],
    scholarship_data
)

# language
write_csv(
    "language.csv",
    [
        "LanguageID",
        "ApplicantID",
        "KnowsEnglish",
        "EnglishProficiency"
    ],
    language_data
)

# activities
write_csv(
    "activities.csv",
    [
        "ActivitiesID",
        "ApplicantID",
        "EntrepreneurshipClubMember",
        "ClubName",
        "ProfessionalSport",
        "RoleInSport",
        "NGO_Member",
        "NGO_Name",
        "ParticipatedInNGOProject",
        "EntrepreneurshipExperience",
        "EntrepreneurshipExperienceDetails"
    ],
    activities_data
)

print("Done! Separate CSVs have been created.")


Done! Separate CSVs have been created.


In [14]:
import pandas as pd

# DataFrame'i yükleme
def filter_and_save_csv(input_file, output_file):
    try:
        # CSV dosyasını okuma
        df = pd.read_csv(input_file)

        # Seçilecek sütunları tanımlama
        selected_columns = ["Ingilizce Biliyor musunuz?","Ingilizce Seviyeniz?"]

        # Tüm satırları ve seçili sütunları filtreleme
        filtered_data = df[selected_columns]

        # Filtrelenmiş veriyi yeni bir CSV dosyasına yazma
        filtered_data.to_csv(output_file, index=False)
        print(f"Filtrelenmiş veri başarıyla {output_file} dosyasına yazıldı.")
    except Exception as e:
        print(f"Bir hata oluştu: {e}")

# Kullanım örneği
input_file = 'data.csv'  # Girdi CSV dosyası
output_file = 'language.csv'  # Çıktı CSV dosyası

filter_and_save_csv(input_file, output_file)

Filtrelenmiş veri başarıyla language.csv dosyasına yazıldı.


  df = pd.read_csv(input_file)


In [33]:
import pandas as pd

# Family.csv dosyasını okuyup yeni FamilyID ekleyen ve kaydeden fonksiyon
def add_family_id_and_save(input_file, output_file):
    try:
        # CSV dosyasını okuma
        df = pd.read_csv(input_file)

        # Sıralı artan bir FamilyID sütunu ekleme
        df.insert(0, "2ass2", range(1, len(df) + 1))  # FamilyID sütunu 1'den başlayarak artacak

        # Yeni CSV dosyasına yazma
        df.to_csv(output_file, index=False)
        print(f"FamilyID sütunu eklendi ve sonuç başarıyla {output_file} dosyasına kaydedildi.")
    except Exception as e:
        print(f"Bir hata oluştu: {e}")

# Kullanım örneği
input_file = 'applicationuniversity_new2.csv'  # Girdi CSV dosyası
output_file = 'applicationuniversity_new3.csv'  # Çıktı CSV dosyası

add_family_id_and_save(input_file, output_file)


FamilyID sütunu eklendi ve sonuç başarıyla applicationuniversity_new3.csv dosyasına kaydedildi.
