# **Importing Libraries**

In [7]:
import csv
import pandas as pd
import numpy as np
import re
from collections import Counter
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Input, LSTM, Dense, Embedding
from tensorflow.keras.models import Model

# **Creating the CSV file**

In [8]:
# Define a list of dictionaries containing metadata, COBOL code, and its Java translation
cobol_java_pairs = [
    {
        "ID": 1,
        "Domain": "General",
        "Purpose": "HelloWorld",
        "COBOL Code": """IDENTIFICATION DIVISION.
                        PROGRAM-ID. HelloWorld.
                        PROCEDURE DIVISION.
                        P1.
                            DISPLAY "Hello, World!".
                            STOP RUN.""",
        "Java Translation": """public class HelloWorld {
                                  public static void main(String[] args) {
                                      System.out.println("Hello, World!");
                                  }
                              }"""
    },
    {
        "ID": 2,
        "Domain": "Arithmetic",
        "Purpose": "AddNumbers",
        "COBOL Code": """IDENTIFICATION DIVISION.
                        PROGRAM-ID. AddNumbers.
                        DATA DIVISION.
                        WORKING-STORAGE SECTION.
                        01 A PIC 9(2) VALUE 10.
                        01 B PIC 9(2) VALUE 20.
                        01 SUM PIC 9(4).
                        PROCEDURE DIVISION.
                        P1.
                            COMPUTE SUM = A + B.
                            DISPLAY "SUM=", SUM.
                            STOP RUN.""",
        "Java Translation": """public class AddNumbers {
                                  public static void main(String[] args) {
                                      int A = 10;
                                      int B = 20;
                                      int SUM;
                                      SUM = A + B;
                                      System.out.println("SUM=" + SUM);
                                  }
                              }"""
    },
    {
        "ID": 3,
        "Domain": "Arithmetic",
        "Purpose": "LargestNumber",
        "COBOL Code": """IDENTIFICATION DIVISION.
                        PROGRAM-ID. LargestNumber.
                        DATA DIVISION.
                        WORKING-STORAGE SECTION.
                        01 A PIC 9(2) VALUE 10.
                        01 B PIC 9(2) VALUE 20.
                        01 MAX PIC 9(2).
                        PROCEDURE DIVISION.
                        P1.
                            IF A > B THEN
                                MOVE A TO MAX
                            ELSE
                                MOVE B TO MAX
                            END-IF.
                            DISPLAY "MAX=", MAX.
                            STOP RUN.""",
        "Java Translation": """public class LargestNumber {
                                  public static void main(String[] args) {
                                      int A = 10;
                                      int B = 20;
                                      int MAX;
                                      if (A > B) {
                                          MAX = A;
                                      } else {
                                          MAX = B;
                                      }
                                      System.out.println("MAX=" + MAX);
                                  }
                              }"""
    },
    {
        "ID": 4,
        "Domain": "Mathematical",
        "Purpose": "Factorial",
        "COBOL Code": """IDENTIFICATION DIVISION.
                        PROGRAM-ID. Factorial.
                        DATA DIVISION.
                        WORKING-STORAGE SECTION.
                        01 N PIC 9(2) VALUE 5.
                        01 FACTORIAL PIC 9(10).
                        PROCEDURE DIVISION.
                        P1.
                            COMPUTE FACTORIAL = 1.
                            PERFORM VARYING I FROM 1 BY 1 UNTIL I > N
                                COMPUTE FACTORIAL = FACTORIAL * I
                            END-PERFORM.
                            DISPLAY "FACTORIAL=", FACTORIAL.
                            STOP RUN.""",
        "Java Translation": """public class Factorial {
                                  public static void main(String[] args) {
                                      int N = 5;
                                      long FACTORIAL = 1;
                                      for (int I = 1; I <= N; I++) {
                                          FACTORIAL *= I;
                                      }
                                      System.out.println("FACTORIAL=" + FACTORIAL);
                                  }
                              }"""
    },
    {
        "ID": 5,
        "Domain": "Mathematical",
        "Purpose": "FibonacciSeries",
        "COBOL Code": """IDENTIFICATION DIVISION.
                        PROGRAM-ID. FibonacciSeries.
                        DATA DIVISION.
                        WORKING-STORAGE SECTION.
                        01 N PIC 9(2) VALUE 10.
                        01 FIB1 PIC 9(10) VALUE 0.
                        01 FIB2 PIC 9(10) VALUE 1.
                        01 NEXTFIB PIC 9(10).
                        PROCEDURE DIVISION.
                        P1.
                            DISPLAY FIB1.
                            DISPLAY FIB2.
                            PERFORM UNTIL N <= 2
                                COMPUTE NEXTFIB = FIB1 + FIB2
                                DISPLAY NEXTFIB
                                COMPUTE FIB1 = FIB2
                                COMPUTE FIB2 = NEXTFIB
                                COMPUTE N = N - 1
                            END-PERFORM.
                            STOP RUN.""",
        "Java Translation": """public class FibonacciSeries {
                                  public static void main(String[] args) {
                                      int N = 10;
                                      int FIB1 = 0;
                                      int FIB2 = 1;
                                      int NEXTFIB;
                                      System.out.println(FIB1);
                                      System.out.println(FIB2);
                                      while (N > 2) {
                                          NEXTFIB = FIB1 + FIB2;
                                          System.out.println(NEXTFIB);
                                          FIB1 = FIB2;
                                          FIB2 = NEXTFIB;
                                          N--;
                                      }
                                  }
                              }"""
    },
    {
        "ID": 6,
        "Domain": "File Handling",
        "Purpose": "ReadFile",
        "COBOL Code": """IDENTIFICATION DIVISION.
                        PROGRAM-ID. ReadFile.
                        ENVIRONMENT DIVISION.
                        INPUT-OUTPUT SECTION.
                        FILE-CONTROL.
                            SELECT InputFile ASSIGN TO 'input.txt'.
                        DATA DIVISION.
                        FILE SECTION.
                        FD InputFile.
                        01 InputRecord PIC X(50).
                        WORKING-STORAGE SECTION.
                        01 EOF-SWITCH PIC X VALUE 'N'.
                        PROCEDURE DIVISION.
                        P1.
                            OPEN INPUT InputFile
                            READ InputFile INTO InputRecord
                            AT END MOVE 'Y' TO EOF-SWITCH
                            END-READ
                            PERFORM UNTIL EOF-SWITCH = 'Y'
                                DISPLAY InputRecord
                                READ InputFile INTO InputRecord
                                AT END MOVE 'Y' TO EOF-SWITCH
                                END-READ
                            END-PERFORM
                            CLOSE InputFile
                            STOP RUN.""",
        "Java Translation": """import java.io.BufferedReader;
                                import java.io.FileReader;
                                import java.io.IOException;

                                public class ReadFile {
                                    public static void main(String[] args) {
                                        try {
                                            BufferedReader reader = new BufferedReader(new FileReader("input.txt"));
                                            String line;
                                            while ((line = reader.readLine()) != null) {
                                                System.out.println(line);
                                            }
                                            reader.close();
                                        } catch (IOException e) {
                                            e.printStackTrace();
                                        }
                                    }
                                }"""
    },
    {
        "ID": 7,
        "Domain": "String Manipulation",
        "Purpose": "ReverseString",
        "COBOL Code": """IDENTIFICATION DIVISION.
                        PROGRAM-ID. ReverseString.
                        DATA DIVISION.
                        WORKING-STORAGE SECTION.
                        01 InputString PIC X(50) VALUE 'Hello, World!'.
                        01 OutputString PIC X(50).
                        01 I PIC 9(3) VALUE 1.
                        01 J PIC 9(3) VALUE 1.
                        PROCEDURE DIVISION.
                        P1.
                            MOVE Function REVERSE(InputString) TO OutputString
                            DISPLAY OutputString
                            STOP RUN.
                        FUNCTION REVERSE(A) RETURNS B
                            LOCAL A, B.
                            PERFORM VARYING I FROM LENGTH OF A BY -1 UNTIL I = 0
                                MOVE FUNCTION SUBSTRING(A, I, 1) TO B(I)
                            END-PERFORM
                            SET B TO FUNCTION TRIM(B)
                        END FUNCTION.""",
        "Java Translation": """public class ReverseString {
                                    public static void main(String[] args) {
                                        String inputString = "Hello, World!";
                                        StringBuilder outputString = new StringBuilder();
                                        for (int i = inputString.length() - 1; i >= 0; i--) {
                                            outputString.append(inputString.charAt(i));
                                        }
                                        System.out.println(outputString);
                                    }
                                }"""
    },
    {
        "ID": 8,
        "Domain": "Sorting",
        "Purpose": "BubbleSort",
        "COBOL Code": """IDENTIFICATION DIVISION.
                        PROGRAM-ID. BubbleSort.
                        DATA DIVISION.
                        WORKING-STORAGE SECTION.
                        01 Array OCCURS 10 TIMES PIC 9(3) VALUE 99, 88, 77, 66, 55, 44, 33, 22, 11, 00.
                        01 N PIC 9(3) VALUE 10.
                        01 I PIC 9(3).
                        01 J PIC 9(3).
                        01 TEMP PIC 9(3).
                        PROCEDURE DIVISION.
                        P1.
                            PERFORM VARYING I FROM 1 BY 1 UNTIL I > N - 1
                                PERFORM VARYING J FROM 1 BY 1 UNTIL J > N - I
                                    IF Array(J) > Array(J + 1)
                                        MOVE Array(J) TO TEMP
                                        MOVE Array(J + 1) TO Array(J)
                                        MOVE TEMP TO Array(J + 1)
                                    END-IF
                                END-PERFORM
                            END-PERFORM
                            DISPLAY "Sorted Array:"
                            PERFORM VARYING I FROM 1 BY 1 UNTIL I <= N
                                DISPLAY Array(I)
                            END-PERFORM
                            STOP RUN.""",
        "Java Translation": """public class BubbleSort {
                                    public static void main(String[] args) {
                                        int[] array = {99, 88, 77, 66, 55, 44, 33, 22, 11, 00};
                                        int n = array.length;
                                        int temp;
                                        for (int i = 0; i < n - 1; i++) {
                                            for (int j = 0; j < n - i - 1; j++) {
                                                if (array[j] > array[j + 1]) {
                                                    temp = array[j];
                                                    array[j] = array[j + 1];
                                                    array[j + 1] = temp;
                                                }
                                            }
                                        }
                                        System.out.println("Sorted Array:");
                                        for (int num : array) {
                                            System.out.println(num);
                                        }
                                    }
                                }"""
    },
    {
        "ID": 9,
        "Domain": "Data Structures",
        "Purpose": "Linked List",
        "COBOL Code": """IDENTIFICATION DIVISION.
                        PROGRAM-ID. LinkedList.
                        DATA DIVISION.
                        WORKING-STORAGE SECTION.
                        01 Node.
                            02 Data PIC X(10).
                            02 NextPointer POINTER.
                        01 HeadPointer POINTER.
                        PROCEDURE DIVISION.
                        P1.
                            CALL "INIT_LIST"
                            CALL "INSERT" USING "Data1"
                            CALL "INSERT" USING "Data2"
                            CALL "INSERT" USING "Data3"
                            CALL "DISPLAY_LIST"
                            CALL "DELETE" USING "Data2"
                            CALL "DISPLAY_LIST"
                            STOP RUN.
                        INIT_LIST.
                            SET HeadPointer TO NULL
                            EXIT PROGRAM.
                        INSERT USING Item.
                            SET Node TO ADDRESS OF Node
                            MOVE Item TO Data OF Node
                            MOVE HeadPointer TO NextPointer OF Node
                            SET HeadPointer TO Node
                            EXIT PROGRAM.
                        DISPLAY_LIST.
                            SET Node TO HeadPointer
                            PERFORM UNTIL Node = NULL
                                DISPLAY Data OF Node
                                SET Node TO NextPointer OF Node
                            END-PERFORM
                            EXIT PROGRAM.
                        DELETE USING Item.
                            SET Node TO HeadPointer
                            SET PrevNode TO NULL
                            PERFORM UNTIL Node = NULL
                                IF Data OF Node = Item
                                    IF PrevNode = NULL
                                        SET HeadPointer TO NextPointer OF Node
                                    ELSE
                                        SET NextPointer OF PrevNode TO NextPointer OF Node
                                    END-IF
                                    EXIT PROGRAM
                                END-IF
                                SET PrevNode TO Node
                                SET Node TO NextPointer OF Node
                            END-PERFORM
                            EXIT PROGRAM.""",
        "Java Translation": """public class LinkedList {
                                    static class Node {
                                        String data;
                                        Node next;

                                        Node(String data) {
                                            this.data = data;
                                            this.next = null;
                                        }
                                    }

                                    Node head;

                                    LinkedList() {
                                        this.head = null;
                                    }

                                    void insert(String item) {
                                        Node newNode = new Node(item);
                                        newNode.next = head;
                                        head = newNode;
                                    }

                                    void display() {
                                        Node current = head;
                                        while (current != null) {
                                            System.out.println(current.data);
                                            current = current.next;
                                        }
                                    }

                                    void delete(String item) {
                                        Node current = head;
                                        Node prev = null;
                                        while (current != null) {
                                            if (current.data.equals(item)) {
                                                if (prev == null) {
                                                    head = current.next;
                                                } else {
                                                    prev.next = current.next;
                                                }
                                                return;
                                            }
                                            prev = current;
                                            current = current.next;
                                        }
                                    }

                                    public static void main(String[] args) {
                                        LinkedList list = new LinkedList();
                                        list.insert("Data1");
                                        list.insert("Data2");
                                        list.insert("Data3");
                                        list.display();
                                        list.delete("Data2");
                                        list.display();
                                    }
                                }"""
    },
    {
        "ID": 10,
        "Domain": "Networking",
        "Purpose": "TCP Client",
        "COBOL Code": """IDENTIFICATION DIVISION.
                        PROGRAM-ID. TCPClient.
                        ENVIRONMENT DIVISION.
                        DATA DIVISION.
                        WORKING-STORAGE SECTION.
                        01 Server-IP PIC X(15) VALUE "127.0.0.1".
                        01 PortNumber PIC 9(4) VALUE 8080.
                        01 Client-Socket USAGE POINTER.
                        01 Buffer PIC X(255).
                        PROCEDURE DIVISION.
                        P1.
                            CALL "CONNECT_TO_SERVER" USING Server-IP PortNumber
                            CALL "SEND_DATA" USING "Hello, Server!"
                            CALL "RECEIVE_DATA"
                            CALL "CLOSE_CONNECTION"
                            STOP RUN.
                        CONNECT_TO_SERVER USING ServerIP PortNo.
                            CALL "WSOCK32.DLL" USING BY VALUE 2, BY REFERENCE Client-Socket
                            CALL "WSOCK32.DLL" USING BY VALUE 20, BY VALUE ServerIP, BY VALUE PortNo, BY VALUE Client-Socket
                            EXIT PROGRAM.
                        SEND_DATA USING Data.
                            STRING Data DELIMITED BY SIZE INTO Buffer
                            CALL "WSOCK32.DLL" USING BY VALUE 1, BY VALUE Buffer, BY VALUE LENGTH OF Buffer, BY VALUE Client-Socket
                            EXIT PROGRAM.
                        RECEIVE_DATA.
                            CALL "WSOCK32.DLL" USING BY VALUE 0, BY VALUE Buffer, BY VALUE LENGTH OF Buffer, BY VALUE Client-Socket
                            DISPLAY Buffer
                            EXIT PROGRAM.
                        CLOSE_CONNECTION.
                            CALL "WSOCK32.DLL" USING BY VALUE 6, BY VALUE Client-Socket
                            EXIT PROGRAM.""",
        "Java Translation": """import java.io.*;
                                import java.net.*;

                                public class TCPClient {
                                    public static void main(String[] args) {
                                        try {
                                            String serverIP = "127.0.0.1";
                                            int portNumber = 8080;
                                            Socket clientSocket = new Socket(serverIP, portNumber);
                                            PrintWriter out = new PrintWriter(clientSocket.getOutputStream(), true);
                                            BufferedReader in = new BufferedReader(new InputStreamReader(clientSocket.getInputStream()));

                                            out.println("Hello, Server!");
                                            String response = in.readLine();
                                            System.out.println(response);

                                            out.close();
                                            in.close();
                                            clientSocket.close();
                                        } catch (IOException e) {
                                            e.printStackTrace();
                                        }
                                    }
                                }"""
    },
    {
        "ID": 11,
        "Domain": "Database Management",
        "Purpose": "CRUD Operations",
        "COBOL Code": """IDENTIFICATION DIVISION.
                        PROGRAM-ID. CRUDOperations.
                        DATA DIVISION.
                        WORKING-STORAGE SECTION.
                        01 EmployeeRecord.
                            02 EmployeeID PIC X(5).
                            02 EmployeeName PIC X(30).
                            02 EmployeeAge PIC 99.
                        01 DatabaseFile.
                            02 DatabaseRecord OCCURS 100 TIMES DEPENDING ON EmployeeCount.
                                03 EmployeeID PIC X(5).
                                03 EmployeeName PIC X(30).
                                03 EmployeeAge PIC 99.
                        01 EmployeeCount PIC 99 VALUE 0.
                        PROCEDURE DIVISION.
                        P1.
                            DISPLAY "1. Add Employee"
                            DISPLAY "2. View Employees"
                            DISPLAY "3. Update Employee"
                            DISPLAY "4. Delete Employee"
                            DISPLAY "5. Exit"
                            ACCEPT Choice
                            EVALUATE Choice
                                WHEN 1
                                    CALL "ADD_EMPLOYEE"
                                WHEN 2
                                    CALL "VIEW_EMPLOYEES"
                                WHEN 3
                                    CALL "UPDATE_EMPLOYEE"
                                WHEN 4
                                    CALL "DELETE_EMPLOYEE"
                                WHEN 5
                                    STOP RUN
                                WHEN OTHER
                                    DISPLAY "Invalid Choice"
                            END-EVALUATE
                            GO TO P1.
                        ADD_EMPLOYEE.
                            ACCEPT EmployeeID FROM STDIN
                            ACCEPT EmployeeName FROM STDIN
                            ACCEPT EmployeeAge FROM STDIN
                            ADD 1 TO EmployeeCount
                            MOVE EmployeeID TO EmployeeRecord(EmployeeCount)
                            MOVE EmployeeName TO EmployeeRecord(EmployeeCount)
                            MOVE EmployeeAge TO EmployeeRecord(EmployeeCount)
                            EXIT PROGRAM.
                        VIEW_EMPLOYEES.
                            PERFORM VARYING I FROM 1 BY 1 UNTIL I > EmployeeCount
                                DISPLAY DatabaseRecord(I)
                            END-PERFORM
                            EXIT PROGRAM.
                        UPDATE_EMPLOYEE.
                            ACCEPT EmployeeID FROM STDIN
                            ACCEPT EmployeeName FROM STDIN
                            ACCEPT EmployeeAge FROM STDIN
                            PERFORM VARYING I FROM 1 BY 1 UNTIL I > EmployeeCount
                                IF EmployeeRecord(I) = EmployeeID
                                    MOVE EmployeeName TO DatabaseRecord(I)
                                    MOVE EmployeeAge TO DatabaseRecord(I)
                                    EXIT PROGRAM
                                END-IF
                            END-PERFORM
                            EXIT PROGRAM.
                        DELETE_EMPLOYEE.
                            ACCEPT EmployeeID FROM STDIN
                            PERFORM VARYING I FROM 1 BY 1 UNTIL I > EmployeeCount
                                IF EmployeeRecord(I) = EmployeeID
                                    UNSTRING DatabaseRecord(I) INTO EmployeeID, EmployeeName, EmployeeAge
                                    SUBTRACT 1 FROM EmployeeCount
                                    EXIT PROGRAM
                                END-IF
                            END-PERFORM
                            EXIT PROGRAM.""",
        "Java Translation": """import java.util.ArrayList;
                                import java.util.Scanner;

                                public class CRUDOperations {
                                    static class Employee {
                                        String employeeID;
                                        String employeeName;
                                        int employeeAge;

                                        Employee(String employeeID, String employeeName, int employeeAge) {
                                            this.employeeID = employeeID;
                                            this.employeeName = employeeName;
                                            this.employeeAge = employeeAge;
                                        }
                                    }

                                    static ArrayList<Employee> database = new ArrayList<>();

                                    public static void main(String[] args) {
                                        Scanner scanner = new Scanner(System.in);
                                        while (true) {
                                            System.out.println("1. Add Employee");
                                            System.out.println("2. View Employees");
                                            System.out.println("3. Update Employee");
                                            System.out.println("4. Delete Employee");
                                            System.out.println("5. Exit");
                                            int choice = scanner.nextInt();
                                            switch (choice) {
                                                case 1:
                                                    addEmployee();
                                                    break;
                                                case 2:
                                                    viewEmployees();
                                                    break;
                                                case 3:
                                                    updateEmployee();
                                                    break;
                                                case 4:
                                                    deleteEmployee();
                                                    break;
                                                case 5:
                                                    System.exit(0);
                                                default:
                                                    System.out.println("Invalid Choice");
                                            }
                                        }
                                    }

                                    static void addEmployee() {
                                        Scanner scanner = new Scanner(System.in);
                                        System.out.print("Enter Employee ID: ");
                                        String employeeID = scanner.nextLine();
                                        System.out.print("Enter Employee Name: ");
                                        String employeeName = scanner.nextLine();
                                        System.out.print("Enter Employee Age: ");
                                        int employeeAge = scanner.nextInt();
                                        database.add(new Employee(employeeID, employeeName, employeeAge));
                                    }

                                    static void viewEmployees() {
                                        for (Employee employee : database) {
                                            System.out.println(employee.employeeID + " | " + employee.employeeName + " | " + employee.employeeAge);
                                        }
                                    }

                                    static void updateEmployee() {
                                        Scanner scanner = new Scanner(System.in);
                                        System.out.print("Enter Employee ID to Update: ");
                                        String employeeID = scanner.nextLine();
                                        for (Employee employee : database) {
                                            if (employee.employeeID.equals(employeeID)) {
                                                System.out.print("Enter New Employee Name: ");
                                                String employeeName = scanner.nextLine();
                                                System.out.print("Enter New Employee Age: ");
                                                int employeeAge = scanner.nextInt();
                                                employee.employeeName = employeeName;
                                                employee.employeeAge = employeeAge;
                                                return;
                                            }
                                        }
                                        System.out.println("Employee ID not found.");
                                    }

                                    static void deleteEmployee() {
                                        Scanner scanner = new Scanner(System.in);
                                        System.out.print("Enter Employee ID to Delete: ");
                                        String employeeID = scanner.nextLine();
                                        for (Employee employee : database) {
                                            if (employee.employeeID.equals(employeeID)) {
                                                database.remove(employee);
                                                return;
                                            }
                                        }
                                        System.out.println("Employee ID not found.");
                                    }
                                }"""
    },
        {
        "ID": 13,
        "Domain": "Multithreading",
        "Purpose": "Banking System",
        "COBOL Code": """IDENTIFICATION DIVISION.
                        PROGRAM-ID. BankingSystem.
                        DATA DIVISION.
                        WORKING-STORAGE SECTION.
                        01 AccountBalance PIC 99999 VALUE 10000.
                        01 WithdrawalAmount PIC 99999.
                        01 DepositAmount PIC 99999.
                        PROCEDURE DIVISION.
                        P1.
                            DISPLAY "Current Balance: " AccountBalance
                            ACCEPT WithdrawalAmount FROM STDIN
                            IF WithdrawalAmount <= AccountBalance
                                SUBTRACT WithdrawalAmount FROM AccountBalance GIVING AccountBalance
                                DISPLAY "Withdrawal Successful"
                            ELSE
                                DISPLAY "Insufficient Funds"
                            END-IF
                            ACCEPT DepositAmount FROM STDIN
                            ADD DepositAmount TO AccountBalance GIVING AccountBalance
                            DISPLAY "Deposit Successful"
                            STOP RUN.""",
        "Java Translation": """import java.util.Scanner;

                                public class BankingSystem {
                                    static int accountBalance = 10000;

                                    public static void main(String[] args) {
                                        Scanner scanner = new Scanner(System.in);
                                        while (true) {
                                            System.out.println("Current Balance: " + accountBalance);
                                            System.out.print("Enter Withdrawal Amount: ");
                                            int withdrawalAmount = scanner.nextInt();
                                            if (withdrawalAmount <= accountBalance) {
                                                accountBalance -= withdrawalAmount;
                                                System.out.println("Withdrawal Successful");
                                            } else {
                                                System.out.println("Insufficient Funds");
                                            }
                                            System.out.print("Enter Deposit Amount: ");
                                            int depositAmount = scanner.nextInt();
                                            accountBalance += depositAmount;
                                            System.out.println("Deposit Successful");
                                        }
                                    }
                                }"""
    },
    {
        "ID": 14,
        "Domain": "Machine Learning",
        "Purpose": "Linear Regression",
        "COBOL Code": """IDENTIFICATION DIVISION.
                        PROGRAM-ID. LinearRegression.
                        DATA DIVISION.
                        WORKING-STORAGE SECTION.
                        01 X-Values OCCURS 10 TIMES PIC 999 VALUE 1, 2, 3, 4, 5, 6, 7, 8, 9, 10.
                        01 Y-Values OCCURS 10 TIMES PIC 999 VALUE 5, 7, 9, 11, 13, 15, 17, 19, 21, 23.
                        01 X-Mean PIC 999.
                        01 Y-Mean PIC 999.
                        01 Sum-X-X-Mean PIC 999.
                        01 Sum-Y-Y-Mean PIC 999.
                        01 Sum-X-Y PIC 999.
                        01 Sum-X-Squared PIC 999.
                        01 B1 PIC 9(3)V99.
                        01 B0 PIC 9(3)V99.
                        PROCEDURE DIVISION.
                        P1.
                            PERFORM VARYING I FROM 1 BY 1 UNTIL I > 10
                                ADD X-Values(I) TO Sum-X-X-Mean
                                ADD Y-Values(I) TO Sum-Y-Y-Mean
                            END-PERFORM
                            COMPUTE X-Mean = Sum-X-X-Mean / 10
                            COMPUTE Y-Mean = Sum-Y-Y-Mean / 10
                            PERFORM VARYING I FROM 1 BY 1 UNTIL I > 10
                                COMPUTE Sum-X-Y = Sum-X-Y + ((X-Values(I) - X-Mean) * (Y-Values(I) - Y-Mean))
                                COMPUTE Sum-X-Squared = Sum-X-Squared + ((X-Values(I) - X-Mean) ** 2)
                            END-PERFORM
                            COMPUTE B1 = Sum-X-Y / Sum-X-Squared
                            COMPUTE B0 = Y-Mean - (B1 * X-Mean)
                            DISPLAY "Linear Regression Equation: Y = " B1 "X + " B0
                            STOP RUN.""",
        "Java Translation": """public class LinearRegression {
                                    public static void main(String[] args) {
                                        int[] xValues = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
                                        int[] yValues = {5, 7, 9, 11, 13, 15, 17, 19, 21, 23};
                                        int sumX = 0, sumY = 0, sumXY = 0, sumXSquared = 0;

                                        for (int i = 0; i < 10; i++) {
                                            sumX += xValues[i];
                                            sumY += yValues[i];
                                        }
                                        double xMean = sumX / 10.0;
                                        double yMean = sumY / 10.0;

                                        for (int i = 0; i < 10; i++) {
                                            sumXY += (xValues[i] - xMean) * (yValues[i] - yMean);
                                            sumXSquared += Math.pow(xValues[i] - xMean, 2);
                                        }

                                        double b1 = sumXY / sumXSquared;
                                        double b0 = yMean - (b1 * xMean);

                                        System.out.println("Linear Regression Equation: Y = " + b1 + "X + " + b0);
                                    }
                                }"""
    },
    {
        "ID": 15,
        "Domain": "Web Development",
        "Purpose": "Simple Web Server",
        "COBOL Code": """IDENTIFICATION DIVISION.
                        PROGRAM-ID. SimpleWebServer.
                        DATA DIVISION.
                        WORKING-STORAGE SECTION.
                        01 ServerSocket USAGE POINTER.
                        01 ClientSocket USAGE POINTER.
                        01 InStream USAGE POINTER.
                        01 OutStream USAGE POINTER.
                        01 Request PIC X(255).
                        01 Response PIC X(255).
                        PROCEDURE DIVISION.
                        P1.
                            CALL "CREATE_SERVER" USING BY REFERENCE ServerSocket
                            PERFORM UNTIL 1 = 0
                                CALL "ACCEPT_CONNECTION" USING BY REFERENCE ServerSocket BY REFERENCE ClientSocket
                                CALL "RECEIVE_REQUEST" USING BY REFERENCE ClientSocket BY REFERENCE InStream
                                CALL "PROCESS_REQUEST" USING BY REFERENCE Request BY REFERENCE Response
                                CALL "SEND_RESPONSE" USING BY REFERENCE ClientSocket BY REFERENCE OutStream
                                CALL "CLOSE_CONNECTION" USING BY REFERENCE ClientSocket
                            END-PERFORM
                            CALL "CLOSE_SERVER" USING BY REFERENCE ServerSocket
                            STOP RUN.
                        CREATE_SERVER USING Socket.
                            CALL "WSOCK32.DLL" USING BY VALUE 2, BY REFERENCE Socket
                            CALL "WSOCK32.DLL" USING BY VALUE 20, BY VALUE "127.0.0.1", BY VALUE 8080, BY REFERENCE Socket
                            EXIT PROGRAM.
                        ACCEPT_CONNECTION USING ServerSocket, ClientSocket.
                            CALL "WSOCK32.DLL" USING BY VALUE 3, BY REFERENCE ServerSocket, BY REFERENCE ClientSocket
                            EXIT PROGRAM.
                        RECEIVE_REQUEST USING ClientSocket, InStream.
                            CALL "WSOCK32.DLL" USING BY VALUE 0, BY REFERENCE Request, BY VALUE LENGTH OF Request, BY REFERENCE ClientSocket
                            EXIT PROGRAM.
                        PROCESS_REQUEST USING Request, Response.
                            MOVE "HTTP/1.1 200 OK" TO Response
                            STRING "Content-Type: text/html" DELIMITED BY SIZE INTO Response
                            STRING "" DELIMITED BY SIZE INTO Response
                            STRING "<html><body><h1>Hello, World!</h1></body></html>" DELIMITED BY SIZE INTO Response
                            EXIT PROGRAM.
                        SEND_RESPONSE USING ClientSocket, OutStream.
                            CALL "WSOCK32.DLL" USING BY VALUE 1, BY REFERENCE Response, BY VALUE LENGTH OF Response, BY REFERENCE ClientSocket
                            EXIT PROGRAM.
                        CLOSE_CONNECTION USING Socket.
                            CALL "WSOCK32.DLL" USING BY VALUE 6, BY REFERENCE Socket
                            EXIT PROGRAM.
                        CLOSE_SERVER USING Socket.
                            CALL "WSOCK32.DLL" USING BY VALUE 6, BY REFERENCE Socket
                            EXIT PROGRAM.""",
        "Java Translation": """import java.io.*;
                                import java.net.*;

                                public class SimpleWebServer {
                                    public static void main(String[] args) {
                                        try {
                                            ServerSocket serverSocket = new ServerSocket(8080);
                                            System.out.println("Server started");
                                            while (true) {
                                                Socket clientSocket = serverSocket.accept();
                                                System.out.println("Client connected: " + clientSocket.getInetAddress());

                                                BufferedReader inStream = new BufferedReader(new InputStreamReader(clientSocket.getInputStream()));
                                                String request = inStream.readLine();
                                                System.out.println("Request: " + request);

                                                OutputStream outStream = clientSocket.getOutputStream();
                                                PrintWriter out = new PrintWriter(outStream, true);

                                                out.println("HTTP/1.1 200 OK");
                                                out.println("Content-Type: text/html");
                                                out.println("");
                                                out.println("<html><body><h1>Hello, World!</h1></body></html>");

                                                out.close();
                                                inStream.close();
                                                clientSocket.close();
                                            }
                                        } catch (IOException e) {
                                            e.printStackTrace();
                                        }
                                    }
                                }"""
    },
    {
        "ID": 16,
        "Domain": "Artificial Intelligence",
        "Purpose": "Chess Game",
        "COBOL Code": """IDENTIFICATION DIVISION.
                        PROGRAM-ID. ChessGame.
                        DATA DIVISION.
                        WORKING-STORAGE SECTION.
                        01 Board OCCURS 64 TIMES.
                            02 Piece PIC X(2).
                        01 Player1Piece PIC X(2) VALUE "WP".
                        01 Player2Piece PIC X(2) VALUE "BP".
                        PROCEDURE DIVISION.
                        P1.
                            CALL "INITIALIZE_BOARD"
                            CALL "DISPLAY_BOARD"
                            CALL "MOVE" USING 7 6
                            CALL "DISPLAY_BOARD"
                            CALL "MOVE" USING 0 2
                            CALL "DISPLAY_BOARD"
                            STOP RUN.
                        INITIALIZE_BOARD.
                            MOVE " " TO Board
                            PERFORM VARYING I FROM 1 BY 1 UNTIL I > 8
                                MOVE Player1Piece TO Board(I)
                                MOVE Player1Piece TO Board(8 * 1 + I)
                                MOVE Player2Piece TO Board(8 * 7 + I)
                            END-PERFORM
                            EXIT PROGRAM.
                        DISPLAY_BOARD.
                            PERFORM VARYING I FROM 1 BY 1 UNTIL I > 64
                                DISPLAY Board(I)
                                IF I MOD 8 = 0
                                    DISPLAY ""
                                END-IF
                            END-PERFORM
                            EXIT PROGRAM.
                        MOVE USING FromSquare ToSquare.
                            MOVE Board(FromSquare) TO Board(ToSquare)
                            MOVE " " TO Board(FromSquare)
                            EXIT PROGRAM.""",
        "Java Translation": """public class ChessGame {
                                    static String[] board = new String[64];
                                    static String player1Piece = "WP";
                                    static String player2Piece = "BP";

                                    public static void main(String[] args) {
                                        initializeBoard();
                                        displayBoard();
                                        move(7, 6);
                                        displayBoard();
                                        move(0, 2);
                                        displayBoard();
                                    }

                                    static void initializeBoard() {
                                        for (int i = 0; i < 64; i++) {
                                            board[i] = " ";
                                        }
                                        for (int i = 0; i < 8; i++) {
                                            board[i] = player1Piece;
                                            board[8 * 1 + i] = player1Piece;
                                            board[8 * 7 + i] = player2Piece;
                                        }
                                    }

                                    static void displayBoard() {
                                        for (int i = 0; i < 64; i++) {
                                            System.out.print(board[i] + " ");
                                            if ((i + 1) % 8 == 0) {
                                                System.out.println();
                                            }
                                        }
                                    }

                                    static void move(int fromSquare, int toSquare) {
                                        board[toSquare] = board[fromSquare];
                                        board[fromSquare] = " ";
                                    }
                                }"""
    },
    {
        "ID": 17,
        "Domain": "Data Structures",
        "Purpose": "Binary Search Tree",
        "COBOL Code": """IDENTIFICATION DIVISION.
                        PROGRAM-ID. BinarySearchTree.
                        DATA DIVISION.
                        WORKING-STORAGE SECTION.
                        01 TreeNode.
                            02 Value PIC 999.
                            02 LeftChild USAGE POINTER.
                            02 RightChild USAGE POINTER.
                        01 RootNode USAGE POINTER.
                        PROCEDURE DIVISION.
                        P1.
                            PERFORM VARYING I FROM 1 BY 1 UNTIL I > 10
                                CALL "INSERT_NODE" USING I
                            END-PERFORM
                            CALL "DISPLAY_IN_ORDER" USING RootNode
                            STOP RUN.
                        INSERT_NODE USING NewValue.
                            CALL "ALLOCATE_NODE" USING NewNode
                            MOVE NewValue TO NewNode.Value
                            IF RootNode = NULL
                                MOVE NewNode TO RootNode
                            ELSE
                                PERFORM UNTIL CurrentNode = NULL
                                    MOVE CurrentNode TO ParentNode
                                    IF NewValue < CurrentNode.Value
                                        MOVE CurrentNode.LeftChild TO CurrentNode
                                    ELSE
                                        MOVE CurrentNode.RightChild TO CurrentNode
                                    END-IF
                                END-PERFORM
                                IF NewValue < ParentNode.Value
                                    MOVE NewNode TO ParentNode.LeftChild
                                ELSE
                                    MOVE NewNode TO ParentNode.RightChild
                                END-IF
                            END-IF
                            EXIT PROGRAM.
                        DISPLAY_IN_ORDER USING Node.
                            IF Node NOT = NULL
                                CALL "DISPLAY_IN_ORDER" USING Node.LeftChild
                                DISPLAY Node.Value
                                CALL "DISPLAY_IN_ORDER" USING Node.RightChild
                            END-IF
                            EXIT PROGRAM.
                        ALLOCATE_NODE USING NewNode.
                            CALL "WSOCK32.DLL" USING BY VALUE 21, BY VALUE SIZEOF TreeNode, BY REFERENCE NewNode
                            MOVE NULL TO NewNode.LeftChild
                            MOVE NULL TO NewNode.RightChild
                            EXIT PROGRAM.""",
        "Java Translation": """class TreeNode {
                                    int value;
                                    TreeNode leftChild, rightChild;

                                    TreeNode(int value) {
                                        this.value = value;
                                        leftChild = rightChild = null;
                                    }
                                }

                                public class BinarySearchTree {
                                    TreeNode root;

                                    BinarySearchTree() {
                                        root = null;
                                    }

                                    void insertNode(int newValue) {
                                        root = insertNodeRecursive(root, newValue);
                                    }

                                    TreeNode insertNodeRecursive(TreeNode root, int newValue) {
                                        if (root == null) {
                                            root = new TreeNode(newValue);
                                            return root;
                                        }
                                        if (newValue < root.value) {
                                            root.leftChild = insertNodeRecursive(root.leftChild, newValue);
                                        } else if (newValue > root.value) {
                                            root.rightChild = insertNodeRecursive(root.rightChild, newValue);
                                        }
                                        return root;
                                    }

                                    void displayInOrder(TreeNode node) {
                                        if (node != null) {
                                            displayInOrder(node.leftChild);
                                            System.out.print(node.value + " ");
                                            displayInOrder(node.rightChild);
                                        }
                                    }

                                    public static void main(String[] args) {
                                        BinarySearchTree bst = new BinarySearchTree();
                                        for (int i = 1; i <= 10; i++) {
                                            bst.insertNode(i);
                                        }
                                        bst.displayInOrder(bst.root);
                                    }
                                }"""
    },
    {
        "ID": 18,
        "Domain": "Network Programming",
        "Purpose": "TCP Client-Server Communication",
        "COBOL Code": """IDENTIFICATION DIVISION.
                        PROGRAM-ID. TCPClientServer.
                        DATA DIVISION.
                        WORKING-STORAGE SECTION.
                        01 ClientSocket USAGE POINTER.
                        01 ServerSocket USAGE POINTER.
                        01 InStream USAGE POINTER.
                        01 OutStream USAGE POINTER.
                        01 MessageToSend PIC X(255) VALUE "Hello, Server!".
                        01 ReceivedMessage PIC X(255).
                        PROCEDURE DIVISION.
                        P1.
                            CALL "CREATE_SERVER" USING BY REFERENCE ServerSocket
                            CALL "CREATE_CLIENT" USING BY REFERENCE ClientSocket
                            CALL "SEND_MESSAGE" USING BY REFERENCE ClientSocket BY REFERENCE OutStream
                            CALL "RECEIVE_MESSAGE" USING BY REFERENCE ServerSocket BY REFERENCE InStream
                            CALL "CLOSE_CLIENT" USING BY REFERENCE ClientSocket
                            CALL "CLOSE_SERVER" USING BY REFERENCE ServerSocket
                            STOP RUN.
                        CREATE_SERVER USING ServerSocket.
                            CALL "WSOCK32.DLL" USING BY VALUE 2, BY REFERENCE ServerSocket
                            CALL "WSOCK32.DLL" USING BY VALUE 20, BY VALUE "127.0.0.1", BY VALUE 8080, BY REFERENCE ServerSocket
                            CALL "WSOCK32.DLL" USING BY VALUE 3, BY REFERENCE ServerSocket, BY REFERENCE ServerSocket
                            EXIT PROGRAM.
                        CREATE_CLIENT USING ClientSocket.
                            CALL "WSOCK32.DLL" USING BY VALUE 2, BY REFERENCE ClientSocket
                            CALL "WSOCK32.DLL" USING BY VALUE 20, BY VALUE "127.0.0.1", BY VALUE 8080, BY REFERENCE ClientSocket
                            CALL "WSOCK32.DLL" USING BY VALUE 3, BY REFERENCE ClientSocket, BY REFERENCE ClientSocket
                            EXIT PROGRAM.
                        SEND_MESSAGE USING Socket, Stream.
                            CALL "WSOCK32.DLL" USING BY VALUE 1, BY REFERENCE MessageToSend, BY VALUE LENGTH OF MessageToSend, BY REFERENCE Stream
                            EXIT PROGRAM.
                        RECEIVE_MESSAGE USING Socket, Stream.
                            CALL "WSOCK32.DLL" USING BY VALUE 0, BY REFERENCE ReceivedMessage, BY VALUE LENGTH OF ReceivedMessage, BY REFERENCE Stream
                            DISPLAY ReceivedMessage
                            EXIT PROGRAM.
                        CLOSE_CLIENT USING Socket.
                            CALL "WSOCK32.DLL" USING BY VALUE 6, BY REFERENCE Socket
                            EXIT PROGRAM.
                        CLOSE_SERVER USING Socket.
                            CALL "WSOCK32.DLL" USING BY VALUE 6, BY REFERENCE Socket
                            EXIT PROGRAM.""",
        "Java Translation": """import java.io.*;
                                import java.net.*;

                                public class TCPClientServer {
                                    static final String SERVER_IP = "127.0.0.1";
                                    static final int SERVER_PORT = 8080;

                                    public static void main(String[] args) {
                                        try {
                                            Thread serverThread = new Thread(() -> {
                                                try {
                                                    ServerSocket serverSocket = new ServerSocket(SERVER_PORT);
                                                    Socket clientSocket = serverSocket.accept();
                                                    System.out.println("Client connected: " + clientSocket.getInetAddress());
                                                    BufferedReader inStream = new BufferedReader(new InputStreamReader(clientSocket.getInputStream()));
                                                    String receivedMessage = inStream.readLine();
                                                    System.out.println("Received from client: " + receivedMessage);
                                                    PrintWriter outStream = new PrintWriter(clientSocket.getOutputStream(), true);
                                                    outStream.println("Hello, Client!");
                                                    inStream.close();
                                                    outStream.close();
                                                    clientSocket.close();
                                                    serverSocket.close();
                                                } catch (IOException e) {
                                                    e.printStackTrace();
                                                }
                                            });
                                            serverThread.start();

                                            Thread.sleep(1000); // Give the server some time to start

                                            Socket clientSocket = new Socket(SERVER_IP, SERVER_PORT);
                                            System.out.println("Connected to server: " + clientSocket.getInetAddress());
                                            BufferedReader inStream = new BufferedReader(new InputStreamReader(clientSocket.getInputStream()));
                                            PrintWriter outStream = new PrintWriter(clientSocket.getOutputStream(), true);
                                            outStream.println("Hello, Server!");
                                            String receivedMessage = inStream.readLine();
                                            System.out.println("Received from server: " + receivedMessage);
                                            inStream.close();
                                            outStream.close();
                                            clientSocket.close();
                                        } catch (IOException | InterruptedException e) {
                                            e.printStackTrace();
                                        }
                                    }
                                }"""
    },

]

# file path for the CSV file
csv_file = "cobol_java_pairs.csv"

# Write to the CSV file
with open(csv_file, "w", newline="", encoding="utf-8") as file:
    writer = csv.DictWriter(file, fieldnames=["ID", "Domain", "Purpose", "COBOL Code", "Java Translation"])
    writer.writeheader()
    for pair in cobol_java_pairs:
        writer.writerow(pair)

print(f"CSV file '{csv_file}' has been created successfully.")

CSV file 'cobol_java_pairs.csv' has been created successfully.


Reading the CSV file


In [9]:
import pandas as pd
df=pd.read_csv("/content/cobol_java_pairs.csv")
df

Unnamed: 0,ID,Domain,Purpose,COBOL Code,Java Translation
0,1,General,HelloWorld,IDENTIFICATION DIVISION.\n ...,public class HelloWorld {\n ...
1,2,Arithmetic,AddNumbers,IDENTIFICATION DIVISION.\n ...,public class AddNumbers {\n ...
2,3,Arithmetic,LargestNumber,IDENTIFICATION DIVISION.\n ...,public class LargestNumber {\n ...
3,4,Mathematical,Factorial,IDENTIFICATION DIVISION.\n ...,public class Factorial {\n ...
4,5,Mathematical,FibonacciSeries,IDENTIFICATION DIVISION.\n ...,public class FibonacciSeries {\n ...
5,6,File Handling,ReadFile,IDENTIFICATION DIVISION.\n ...,import java.io.BufferedReader;\n ...
6,7,String Manipulation,ReverseString,IDENTIFICATION DIVISION.\n ...,public class ReverseString {\n ...
7,8,Sorting,BubbleSort,IDENTIFICATION DIVISION.\n ...,public class BubbleSort {\n ...
8,9,Data Structures,Linked List,IDENTIFICATION DIVISION.\n ...,public class LinkedList {\n ...
9,10,Networking,TCP Client,IDENTIFICATION DIVISION.\n ...,import java.io.*;\n ...


# **Pre-processing the DATA of COBOL and JAVA code**

Pre-processing code defined inside the functions for COBOL and JAVA code

In [10]:
# Tokenize the code by splitting on whitespace and symbols
def tokenize_code(code):
    tokens = re.findall(r"[\w]+|[\S]", code)
    return tokens

# Remove single-line comments starting with "!" and multi-line comments enclosed in "*> ... <*"
def remove_comments(code):
    code = re.sub(r"!.*$", "", code, flags=re.MULTILINE)
    code = re.sub(r"\*>.*?<\*", "", code, flags=re.DOTALL)
    return code

# Normalize identifiers to lowercase
def normalize_identifiers(code):
    code = code.lower()
    return code

# Remove comments from the code, Tokenize the code, Normalize identifiers, Join tokens back into a single string
def preprocess_code(code):
    code = remove_comments(code)
    tokens = tokenize_code(code)
    code = normalize_identifiers(code)
    code = " ".join(tokens)
    return code

Pre-processing applied on COBOL and JAVA

In [11]:
df = pd.read_csv("/content/cobol_java_pairs.csv")

df["Preprocessed COBOL Code"] = df["COBOL Code"].apply(preprocess_code)
df["Preprocessed Java Translation"] = df["Java Translation"].apply(preprocess_code)


# **Creating the CSV file for Pre-Processed data**

In [13]:
# file path for the preprocessed CSV file and Write to the CSV file
csv_file_preprocessed = "cobol_java_pairs_preprocessed.csv"

df.to_csv(csv_file_preprocessed, index=False)
print(f"Preprocessed CSV file '{csv_file_preprocessed}' has been created successfully.")

Preprocessed CSV file 'cobol_java_pairs_preprocessed.csv' has been created successfully.


Reading the pre-processed CSV file

In [15]:
import pandas as pd
df_preprocessed=pd.read_csv("/content/cobol_java_pairs_preprocessed.csv")
df_preprocessed

Unnamed: 0,ID,Domain,Purpose,COBOL Code,Java Translation,Preprocessed COBOL Code,Preprocessed Java Translation
0,1,General,HelloWorld,IDENTIFICATION DIVISION.\n ...,public class HelloWorld {\n ...,IDENTIFICATION DIVISION . PROGRAM - ID . Hello...,public class HelloWorld { public static void m...
1,2,Arithmetic,AddNumbers,IDENTIFICATION DIVISION.\n ...,public class AddNumbers {\n ...,IDENTIFICATION DIVISION . PROGRAM - ID . AddNu...,public class AddNumbers { public static void m...
2,3,Arithmetic,LargestNumber,IDENTIFICATION DIVISION.\n ...,public class LargestNumber {\n ...,IDENTIFICATION DIVISION . PROGRAM - ID . Large...,public class LargestNumber { public static voi...
3,4,Mathematical,Factorial,IDENTIFICATION DIVISION.\n ...,public class Factorial {\n ...,IDENTIFICATION DIVISION . PROGRAM - ID . Facto...,public class Factorial { public static void ma...
4,5,Mathematical,FibonacciSeries,IDENTIFICATION DIVISION.\n ...,public class FibonacciSeries {\n ...,IDENTIFICATION DIVISION . PROGRAM - ID . Fibon...,public class FibonacciSeries { public static v...
5,6,File Handling,ReadFile,IDENTIFICATION DIVISION.\n ...,import java.io.BufferedReader;\n ...,IDENTIFICATION DIVISION . PROGRAM - ID . ReadF...,import java . io . BufferedReader ; import jav...
6,7,String Manipulation,ReverseString,IDENTIFICATION DIVISION.\n ...,public class ReverseString {\n ...,IDENTIFICATION DIVISION . PROGRAM - ID . Rever...,public class ReverseString { public static voi...
7,8,Sorting,BubbleSort,IDENTIFICATION DIVISION.\n ...,public class BubbleSort {\n ...,IDENTIFICATION DIVISION . PROGRAM - ID . Bubbl...,public class BubbleSort { public static void m...
8,9,Data Structures,Linked List,IDENTIFICATION DIVISION.\n ...,public class LinkedList {\n ...,IDENTIFICATION DIVISION . PROGRAM - ID . Linke...,public class LinkedList { static class Node { ...
9,10,Networking,TCP Client,IDENTIFICATION DIVISION.\n ...,import java.io.*;\n ...,IDENTIFICATION DIVISION . PROGRAM - ID . TCPCl...,import java . io . * ; import java . net . * ;...


# **Get the number of unique tokens in the input(num_encoder_tokens)**

Converting text data into numerical sequence of input sentences(Tokenize the COBOL code)

In [19]:
from keras.layers import Dropout
from keras.optimizers import Adam

# Tokenize COBOL code
cobol_input_sentences=df_preprocessed["Preprocessed COBOL Code"]
tokenizer_cobol = Tokenizer()
tokenizer_cobol.fit_on_texts(df_preprocessed["Preprocessed COBOL Code"])
encoder_input_sequences = tokenizer_cobol.texts_to_sequences(df_preprocessed["Preprocessed COBOL Code"])

print(cobol_input_sentences)
print("*************************************************************************************************************************************************************************")
print(encoder_input_sequences)

0     IDENTIFICATION DIVISION . PROGRAM - ID . Hello...
1     IDENTIFICATION DIVISION . PROGRAM - ID . AddNu...
2     IDENTIFICATION DIVISION . PROGRAM - ID . Large...
3     IDENTIFICATION DIVISION . PROGRAM - ID . Facto...
4     IDENTIFICATION DIVISION . PROGRAM - ID . Fibon...
5     IDENTIFICATION DIVISION . PROGRAM - ID . ReadF...
6     IDENTIFICATION DIVISION . PROGRAM - ID . Rever...
7     IDENTIFICATION DIVISION . PROGRAM - ID . Bubbl...
8     IDENTIFICATION DIVISION . PROGRAM - ID . Linke...
9     IDENTIFICATION DIVISION . PROGRAM - ID . TCPCl...
10    IDENTIFICATION DIVISION . PROGRAM - ID . CRUDO...
11    IDENTIFICATION DIVISION . PROGRAM - ID . Banki...
12    IDENTIFICATION DIVISION . PROGRAM - ID . Linea...
13    IDENTIFICATION DIVISION . PROGRAM - ID . Simpl...
14    IDENTIFICATION DIVISION . PROGRAM - ID . Chess...
15    IDENTIFICATION DIVISION . PROGRAM - ID . Binar...
16    IDENTIFICATION DIVISION . PROGRAM - ID . TCPCl...
Name: Preprocessed COBOL Code, dtype: object
***

Pad the sequences to ensure equal lengths of input sequences

In [20]:
# Pad COBOL sequences
max_encoder_seq_length = max(len(seq) for seq in encoder_input_sequences)
encoder_input_data = pad_sequences(encoder_input_sequences, maxlen=max_encoder_seq_length, padding='post')

print(encoder_input_data)


[[35  7  9 ...  0  0  0]
 [35  7  9 ...  0  0  0]
 [35  7  9 ...  0  0  0]
 ...
 [35  7  9 ...  0  0  0]
 [35  7  9 ...  0  0  0]
 [35  7  9 ...  0  0  0]]


Calculating the encoder tokens of COBOL

In [22]:
# Get the number of unique tokens in COBOL vocabulary
num_encoder_tokens = len(tokenizer_cobol.word_index) + 1
print(num_encoder_tokens)

260


# **Get the number of unique tokens in the input(num_decoder_tokens)**

Converting text data into numerical sequence of input sentences of JAVA code

In [25]:
# Tokenize Java translation
java_input_sentences=df_preprocessed["Preprocessed Java Translation"]
tokenizer_java = Tokenizer()
tokenizer_java.fit_on_texts(df_preprocessed["Preprocessed Java Translation"])
decoder_input_sequences = tokenizer_java.texts_to_sequences(df_preprocessed["Preprocessed Java Translation"])

print(java_input_sentences)
print("*************************************************************************************************************************************************************************")
print(decoder_input_sequences)

0     public class HelloWorld { public static void m...
1     public class AddNumbers { public static void m...
2     public class LargestNumber { public static voi...
3     public class Factorial { public static void ma...
4     public class FibonacciSeries { public static v...
5     import java . io . BufferedReader ; import jav...
6     public class ReverseString { public static voi...
7     public class BubbleSort { public static void m...
8     public class LinkedList { static class Node { ...
9     import java . io . * ; import java . net . * ;...
10    import java . util . ArrayList ; import java ....
11    import java . util . Scanner ; public class Ba...
12    public class LinearRegression { public static ...
13    import java . io . * ; import java . net . * ;...
14    public class ChessGame { static String [ ] boa...
15    class TreeNode { int value ; TreeNode leftChil...
16    import java . io . * ; import java . net . * ;...
Name: Preprocessed Java Translation, dtype: obje

Pad the sequences to ensure equal lengths of target sequences

In [26]:
# Pad Java sequences
max_decoder_seq_length = max(len(seq) for seq in decoder_input_sequences)
decoder_input_data = pad_sequences(decoder_input_sequences, maxlen=max_decoder_seq_length, padding='post')

Calculating the decoder tokens of Java

In [27]:
# Get the number of unique tokens in Java vocabulary
num_decoder_tokens = len(tokenizer_java.word_index) + 1
print(num_decoder_tokens)

254


# **Initialize decoder target data**

In [28]:
# Initialize decoder target data
decoder_target_data = np.zeros((len(decoder_input_sequences), max_decoder_seq_length, num_decoder_tokens), dtype="float32")

# Populate decoder target data
for i, seq in enumerate(decoder_input_sequences):
    for t, token in enumerate(seq):
        decoder_target_data[i, t, token] = 1.0

# **Split the data into training and validation sets**

In [29]:
# Split the data into training and validation sets
encoder_input_train, encoder_input_val, decoder_input_train, decoder_input_val, decoder_target_train, decoder_target_val = train_test_split(
    encoder_input_data, decoder_input_data, decoder_target_data, test_size=0.2
)

# **Building the Neural Machine Translation model**

In [63]:
latent_dim = 254 # Define the dimensionality of the latent spac

In [64]:
# Define the encoder input layer
encoder_inputs = Input(shape=(None,))
encoder_embedding = Embedding(num_encoder_tokens, latent_dim)(encoder_inputs)
encoder = LSTM(latent_dim, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_embedding)
encoder_outputs = Dropout(0.5)(encoder_outputs)  # Adjust dropout rate as needed
encoder_states = [state_h, state_c]

In [65]:
# Adjust num_decoder_tokens to match the target data
num_decoder_tokens_target = len(tokenizer_java.word_index) + 1

In [66]:
# Define the decoder input layer
decoder_inputs = Input(shape=(None,))
decoder_embedding = Embedding(num_decoder_tokens, latent_dim)(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)
decoder_dense = Dense(num_decoder_tokens_target, activation="softmax")
decoder_outputs = Dropout(0.2)(decoder_outputs)

In [67]:
# Define the model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

In [None]:
from keras.optimizers import Adam, RMSprop
optimizer1 = RMSprop(lr=0.001)

In [68]:
# Try different optimizers and learning rates
optimizer = Adam(lr=0.001)  # Adjust learning rate as needed
model.compile(optimizer=optimizer1, loss="categorical_crossentropy", metrics=["accuracy"])



In [70]:
# Train the model
translation_model=model.fit(
    [encoder_input_train, decoder_input_train],
    decoder_target_train,
    validation_data=([encoder_input_val, decoder_input_val], decoder_target_val),
    batch_size=64,
    epochs=10
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# **Saving the model**

In [71]:
# Save the trained model
model.save("translation_model.h5")

  saving_api.save_model(


In [75]:
from tensorflow.keras.models import load_model

# Load the saved model
loaded_model = load_model("translation_model.h5")
loaded_model

<keras.src.engine.functional.Functional at 0x7ae6eb19cf40>