In [None]:
import javalang
import pandas as pd

# Function to extract features from a single Java file
def extract_features_from_file(file_path):
    features = []
    with open(file_path, 'r') as file:
        java_code = file.read()
    
    try:
        tree = javalang.parse.parse(java_code)
    except Exception as e:
        print(f"Failed to parse {file_path}: {e}")
        return []
    
    for path, node in tree:
        feature = {
            'method': None,
            'algorithm': None,
            'provider': None,
            'exception_handled': None,
            'file': file_path
        }
        
        if isinstance(node, javalang.tree.MethodInvocation):
            if node.member == 'getInstance':
                feature['method'] = 'getInstance'
                if node.arguments:
                    feature['algorithm'] = node.arguments[0].value
                    if len(node.arguments) > 1:
                        feature['provider'] = node.arguments[1].value
            elif node.member in ['update', 'digest']:
                feature['method'] = node.member
        
        if isinstance(node, javalang.tree.CatchClause):
            exception_types = [str(t).split('.')[-1] for t in node.parameter.types]
            if 'NoSuchAlgorithmException' in exception_types:
                feature['exception_handled'] = 'NoSuchAlgorithmException'
            elif 'NoSuchProviderException' in exception_types:
                feature['exception_handled'] = 'NoSuchProviderException'
        
        if any(feature.values()):
            features.append(feature)
    
    return features

# Extract features from all Java files
all_features = []
for java_file in java_files:
    features = extract_features_from_file(java_file)
    all_features.extend(features)

# Convert to DataFrame
df_features = pd.DataFrame(all_features)
print(df_features.head())
