In [1]:
%run ~/setup_rpy2.py

Setting up R and rpy2 environment...
Added conda environment to Python path: /home/ec2-user/miniconda3/envs/r_env/lib/python3.8/site-packages
Set R_HOME to /home/ec2-user/miniconda3/envs/r_env/lib/R
rpy2 version: 3.5.11
Successfully imported rpy2.robjects
Basic R test: 1+1 = 2.0

R and rpy2 setup successful!

Example usage:
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
pandas2ri.activate()
result = ro.r('rnorm(10)')
print(result)

Pandas integration activated successfully!


In [2]:
!pwd

/home/ec2-user/SageMaker


In [3]:
import subprocess
import sys
from IPython.display import clear_output

def install_r_packages(package_list, verbose=False):
    """
    Install R packages using conda with controlled output
    
    Args:
        package_list: List of R package names (without 'r-' prefix)
        verbose: Whether to show detailed output
        
    Returns:
        bool: True if successful, False otherwise
    """
    # Add r- prefix to package names
    conda_packages = [f"r-{pkg.lower()}" for pkg in package_list]
    
    print(f"Installing packages: {', '.join(package_list)}...")
    
    try:
        # Prepare the conda command
        cmd = [
            "conda", "install", "-y", "-c", "conda-forge", 
            *conda_packages
        ]
        
        # Run the installation with output capture
        process = subprocess.Popen(
            cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True
        )
        
        # Process output in real-time if verbose
        if verbose:
            while True:
                output = process.stdout.readline()
                if output == '' and process.poll() is not None:
                    break
                if output:
                    print(output.strip())
                    
        # Wait for command to complete
        return_code = process.wait()
        
        # Clear previous output and show final status
        clear_output(wait=True)
        
        if return_code == 0:
            print(f"✅ Successfully installed: {', '.join(package_list)}")
            return True
        else:
            error_output = process.stderr.read()
            print(f"❌ Installation failed with error:")
            print('\n'.join(error_output.split('\n')[-5:]))
            return False
            
    except Exception as e:
        print(f"❌ Error: {str(e)}")
        return False

In [4]:
install_r_packages(["matchit"])

✅ Successfully installed: matchit


True

In [5]:
%%bash
# Alternate way to install the required R packages.
if source ~/miniconda3/bin/activate && \
   conda activate r_env && \
   conda install -y -c conda-forge r-matchit r-rcpp r-rlang r-backports r-dplyr > /dev/null 2>&1; then
  echo "✅ Packages installed successfully."
else
  echo "❌ Error: Package installation failed. Try running without output suppression for details."
fi

✅ Packages installed successfully.


In [6]:
# Now import and use MatchIt
import rpy2.robjects as ro
from rpy2.robjects.packages import importr
from rpy2.robjects import pandas2ri
import pandas as pd

# Enable pandas conversion
pandas2ri.activate()

# Import the package (should work now)
matchit = importr('MatchIt')

# Sample pandas dataframe
df = pd.DataFrame({
    'treatment': [1, 0, 1, 0, 1, 0],
    'age': [34, 22, 45, 25, 50, 27],
    'income': [100, 200, 150, 250, 120, 210],
    'education': [12, 16, 14, 13, 18, 15]
})

# Convert to R dataframe
r_df = pandas2ri.py2rpy(df)

# Run MatchIt
formula = ro.Formula('treatment ~ age + income + education')
match_result = matchit.matchit(formula, data=r_df, method="nearest")

# Get matched data
matched_data = matchit.match_data(match_result)
print("Matched data:")
print(pandas2ri.rpy2py(matched_data))

Matched data:
   treatment  age  income  education      distance  weights  subclass
0          1   34     100         12  1.000000e+00      1.0         1
1          0   22     200         16  2.220446e-16      1.0         3
2          1   45     150         14  1.000000e+00      1.0         2
3          0   25     250         13  8.043900e-13      1.0         2
4          1   50     120         18  1.000000e+00      1.0         3
5          0   27     210         15  7.890961e-11      1.0         1
